@intuned/browser-dev 0.1.4-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/BROWSER_SCRIPTS_SETUP.md +84 -0
  5. package/LICENSE +43 -0
  6. package/README.md +160 -0
  7. package/RELEASE.md +60 -0
  8. package/dist/ai/export.d.js +5 -0
  9. package/dist/ai/export.d.ts +641 -0
  10. package/dist/ai/extractStructuredData.js +320 -0
  11. package/dist/ai/extractStructuredDataUsingAi.js +142 -0
  12. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  13. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  14. package/dist/ai/index.d.ts +641 -0
  15. package/dist/ai/index.js +19 -0
  16. package/dist/ai/isPageLoaded.js +80 -0
  17. package/dist/ai/prompt.js +39 -0
  18. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  19. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  20. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  21. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  22. package/dist/ai/tools/index.js +48 -0
  23. package/dist/ai/types/errors.js +67 -0
  24. package/dist/ai/types/models.js +45 -0
  25. package/dist/ai/types/types.js +48 -0
  26. package/dist/ai/validators.js +167 -0
  27. package/dist/common/Logger/index.js +60 -0
  28. package/dist/common/Logger/types.js +5 -0
  29. package/dist/common/SdkError.js +50 -0
  30. package/dist/common/aiModelsValidations.js +32 -0
  31. package/dist/common/ensureBrowserScripts.js +14 -0
  32. package/dist/common/extendedTest.js +157 -0
  33. package/dist/common/extractionHelpers.js +19 -0
  34. package/dist/common/formatZodError.js +18 -0
  35. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  36. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  37. package/dist/common/fuzzySearch/utils.js +23 -0
  38. package/dist/common/getModelProvider.js +18 -0
  39. package/dist/common/getSimplifiedHtml.js +122 -0
  40. package/dist/common/hashObject.js +32 -0
  41. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  42. package/dist/common/html2markdown/index.js +19 -0
  43. package/dist/common/jwtTokenManager.js +57 -0
  44. package/dist/common/loadRuntime.js +16 -0
  45. package/dist/common/locatorHelpers.js +41 -0
  46. package/dist/common/matching/collectStrings.js +32 -0
  47. package/dist/common/matching/levenshtein.js +40 -0
  48. package/dist/common/matching/matching.js +317 -0
  49. package/dist/common/matching/types.js +1 -0
  50. package/dist/common/noEmpty.js +9 -0
  51. package/dist/common/saveSnapshotWithExamples.js +60 -0
  52. package/dist/common/script.js +2602 -0
  53. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  54. package/dist/common/xpathMapping.js +107 -0
  55. package/dist/helpers/clickUntilExhausted.js +85 -0
  56. package/dist/helpers/downloadFile.js +125 -0
  57. package/dist/helpers/export.d.js +5 -0
  58. package/dist/helpers/export.d.ts +1220 -0
  59. package/dist/helpers/extractMarkdown.js +35 -0
  60. package/dist/helpers/filterEmptyValues.js +54 -0
  61. package/dist/helpers/gotoUrl.js +98 -0
  62. package/dist/helpers/index.d.ts +1220 -0
  63. package/dist/helpers/index.js +122 -0
  64. package/dist/helpers/processDate.js +25 -0
  65. package/dist/helpers/resolveUrl.js +64 -0
  66. package/dist/helpers/sanitizeHtml.js +74 -0
  67. package/dist/helpers/saveFileToS3.js +50 -0
  68. package/dist/helpers/scrollToLoadContent.js +57 -0
  69. package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
  70. package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
  71. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  72. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  73. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  74. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  75. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  76. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  77. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  78. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  79. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  80. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  81. package/dist/helpers/types/Attachment.js +115 -0
  82. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  83. package/dist/helpers/types/RunEnvironment.js +18 -0
  84. package/dist/helpers/types/ValidationError.js +17 -0
  85. package/dist/helpers/types/index.js +51 -0
  86. package/dist/helpers/uploadFileToS3.js +154 -0
  87. package/dist/helpers/utils/getS3Client.js +22 -0
  88. package/dist/helpers/utils/index.js +73 -0
  89. package/dist/helpers/utils/isDownload.js +10 -0
  90. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  91. package/dist/helpers/utils/isLocator.js +9 -0
  92. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  93. package/dist/helpers/validateDataUsingSchema.js +103 -0
  94. package/dist/helpers/waitForDomSettled.js +90 -0
  95. package/dist/helpers/withNetworkSettledWait.js +91 -0
  96. package/dist/index.d.js +16 -0
  97. package/dist/index.d.ts +10 -0
  98. package/dist/index.js +16 -0
  99. package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
  100. package/dist/intunedServices/ApiGateway/factory.js +16 -0
  101. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  102. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  103. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  104. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
  105. package/dist/intunedServices/ApiGateway/types.js +11 -0
  106. package/dist/intunedServices/cache/cache.js +61 -0
  107. package/dist/intunedServices/cache/index.js +12 -0
  108. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  109. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  110. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  112. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  113. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  114. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  115. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  116. package/dist/optimized-extractors/common/index.js +55 -0
  117. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  118. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  119. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  120. package/dist/optimized-extractors/common/matching/types.js +18 -0
  121. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  122. package/dist/optimized-extractors/common/utils.js +58 -0
  123. package/dist/optimized-extractors/export.d.js +5 -0
  124. package/dist/optimized-extractors/export.d.ts +397 -0
  125. package/dist/optimized-extractors/extractArray.js +120 -0
  126. package/dist/optimized-extractors/extractObject.js +104 -0
  127. package/dist/optimized-extractors/index.d.ts +397 -0
  128. package/dist/optimized-extractors/index.js +31 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  144. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  145. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  146. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  147. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  148. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  149. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  160. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  161. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  162. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  163. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  164. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  165. package/dist/optimized-extractors/types/errors.js +42 -0
  166. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  167. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  168. package/dist/optimized-extractors/types/types.js +5 -0
  169. package/dist/optimized-extractors/validators.js +152 -0
  170. package/dist/types/intuned-runtime.d.js +1 -0
  171. package/dist/types/intuned-runtime.d.ts +64 -0
  172. package/dist/vite-env.d.js +1 -0
  173. package/dist/vite-env.d.ts +9 -0
  174. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  175. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  176. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  177. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  178. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  179. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  180. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  181. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  182. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  183. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  184. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  185. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  186. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  187. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  188. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  189. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  190. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  191. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  192. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  193. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  194. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  195. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  196. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  197. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  198. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  199. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  200. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  201. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  202. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  203. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  204. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  205. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  206. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  207. package/how-to-generate-docs.md +61 -0
  208. package/how-to-run-tests.md +42 -0
  209. package/intuned-runtime-setup.md +13 -0
  210. package/package.json +124 -0
  211. package/tsconfig.eslint.json +5 -0
  212. package/tsconfig.json +26 -0
@@ -0,0 +1,277 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractPropertiesUsingGPTFromArray = extractPropertiesUsingGPTFromArray;
7
+ var _neverthrow = require("neverthrow");
8
+ var _errors = require("../errors");
9
+ var _common = require("../../common/");
10
+ var _isTableHeaderOrFooter = require("../../common/isTableHeaderOrFooter");
11
+ var _Logger = require("../../../common/Logger");
12
+ var _extractionHelpers = require("../../../common/extractionHelpers");
13
+ var _buildExamplesPrompt = require("../../common/buildExamplesPrompt");
14
+ var _utils = require("../../common/utils");
15
+ async function extractPropertiesUsingGPTFromArray(input) {
16
+ if (input.strategy.type === "IMAGE") {
17
+ return extractPropertiesWithImageStrategy(input);
18
+ } else if (input.strategy.type === "HTML") {
19
+ return extractPropertiesWithHTMLStrategy(input);
20
+ }
21
+ throw new Error("Invalid strategy type");
22
+ }
23
+ async function extractPropertiesUsingGPT({
24
+ itemEntityName,
25
+ itemEntitySchema,
26
+ text,
27
+ image,
28
+ extraRowDataFromTableRow,
29
+ strategy,
30
+ tableHeaders,
31
+ identifier,
32
+ possibleTableHeaderOrFooter,
33
+ examples,
34
+ apiKey
35
+ }) {
36
+ const systemMessage = extraRowDataFromTableRow ? `You are a data extraction assistant, out of this text, you will be asked to extract some data. Be accurate, give complete results. If you cannot find the field data, use null. make sure to follow examples if provided. The data will be provided twice, once as a json that was extracted from an html table and once as the html for that row in the table.` : `You are a data extraction assistant, out of this text, you will be asked to extract some data. Be accurate, give complete results. If you cannot find the field data, use null. make sure to follow examples if provided.`;
37
+ const extraUserMessages = [];
38
+ if (examples.length > 0) {
39
+ const examplesMessage = (0, _buildExamplesPrompt.buildExamplesPrompt)({
40
+ entityName: itemEntityName,
41
+ examples
42
+ });
43
+ extraUserMessages.push(examplesMessage);
44
+ }
45
+ if (possibleTableHeaderOrFooter) {
46
+ const content = text ?? image;
47
+ const isHeader = await (0, _isTableHeaderOrFooter.isTableHeaderOrFooter)(content);
48
+ if (isHeader.isErr()) {
49
+ return (0, _neverthrow.err)(isHeader.error);
50
+ }
51
+ if (isHeader.value.isHeader) {
52
+ return (0, _neverthrow.ok)({});
53
+ }
54
+ }
55
+ if (tableHeaders) {
56
+ extraUserMessages.push(`this data are part of a table, the table headers in order are: ${tableHeaders === null || tableHeaders === void 0 ? void 0 : tableHeaders.join(", ")}`);
57
+ }
58
+ if (extraRowDataFromTableRow) {
59
+ extraUserMessages.push(JSON.stringify(extraRowDataFromTableRow));
60
+ }
61
+ const extractionResult = await (0, _common.extractStructuredDataUsingAi)({
62
+ entityName: itemEntityName,
63
+ model: strategy.model,
64
+ text: text ? [text] : undefined,
65
+ jsonSchema: itemEntitySchema,
66
+ systemMessage,
67
+ extraUserMessages,
68
+ images: image ? [{
69
+ data: image,
70
+ image_type: "png"
71
+ }] : [],
72
+ identifier,
73
+ apiKey
74
+ });
75
+ if (extractionResult.isErr()) {
76
+ if (extractionResult.error.type === "NoDataFound") {
77
+ return (0, _neverthrow.ok)({});
78
+ }
79
+ return (0, _neverthrow.err)((0, _errors.invalidExtractionResult)(extractionResult.error.context));
80
+ }
81
+ return (0, _neverthrow.ok)(extractionResult.value.result);
82
+ }
83
+ async function extractPropertiesWithImageStrategy({
84
+ identifier,
85
+ itemEntityName,
86
+ itemEntitySchema,
87
+ items,
88
+ strategy,
89
+ tableAsJsonArray,
90
+ tableHeaders,
91
+ examples: previousExamples,
92
+ apiKey
93
+ }) {
94
+ const shouldUseTableData = !!tableAsJsonArray && tableAsJsonArray.length === items.length;
95
+ const CHUNK_SIZE = process.env.TEST === "true" ? 1 : 5;
96
+ const chunkedExtractPropertiesUsingGPTParameters = items.reduce((acc, item, index) => {
97
+ const chunkIndex = Math.floor(index / CHUNK_SIZE);
98
+ if (!acc[chunkIndex]) {
99
+ acc[chunkIndex] = [];
100
+ }
101
+ acc[chunkIndex].push({
102
+ item,
103
+ index
104
+ });
105
+ return acc;
106
+ }, []);
107
+ const result = [];
108
+ for (const chunk of chunkedExtractPropertiesUsingGPTParameters) {
109
+ const examples = (0, _utils.getRandomItems)([...result, ...previousExamples], 3);
110
+ const promises = chunk.map(args => extractPropertiesUsingGPT({
111
+ itemEntityName,
112
+ itemEntitySchema,
113
+ text: undefined,
114
+ image: args.item.type === "image" ? args.item.buffer : undefined,
115
+ extraRowDataFromTableRow: shouldUseTableData ? tableAsJsonArray[args.index] : undefined,
116
+ strategy,
117
+ tableHeaders,
118
+ identifier,
119
+ possibleTableHeaderOrFooter: args.index === 0 || args.index === items.length - 1,
120
+ examples,
121
+ apiKey
122
+ }));
123
+ const results = await Promise.all(promises);
124
+ const errorResult = results.find(r => r.isErr());
125
+ if (errorResult && errorResult.isErr()) {
126
+ return (0, _neverthrow.err)(errorResult.error);
127
+ }
128
+ const listOfResults = results.map(r => r._unsafeUnwrap());
129
+ listOfResults.forEach((r, i) => {
130
+ const itemIndexInFullList = i + result.length;
131
+ _Logger.logger.debug(`Extracted this info from array item #${itemIndexInFullList}: ${JSON.stringify(r)}`);
132
+ });
133
+ result.push(...listOfResults);
134
+ }
135
+ return (0, _neverthrow.ok)(result);
136
+ }
137
+ async function extractPropertiesWithHTMLStrategy({
138
+ identifier,
139
+ itemEntityName,
140
+ itemEntitySchema,
141
+ items,
142
+ strategy,
143
+ tableAsJsonArray,
144
+ tableHeaders,
145
+ examples: previousExamples,
146
+ apiKey
147
+ }) {
148
+ const shouldUseTableData = !!tableAsJsonArray && tableAsJsonArray.length === items.length;
149
+ const isWeakModel = strategy.model === "claude-3-haiku" || strategy.model == "gpt3.5-turbo";
150
+ const averageItemLength = items.reduce((sum, item) => {
151
+ if (item.type !== "text") return sum;
152
+ return sum + (0, _extractionHelpers.compressStringSpaces)(item.text).length;
153
+ }, 0) / items.length;
154
+ const shouldUseSmallerChunkSize = isWeakModel && averageItemLength > 1000;
155
+ const CHUNK_SIZE = shouldUseSmallerChunkSize ? 3 : 10;
156
+ const itemsChunks = items.reduce((resultArray, item, index) => {
157
+ const chunkIndex = Math.floor(index / CHUNK_SIZE);
158
+ if (!resultArray[chunkIndex]) {
159
+ resultArray[chunkIndex] = [];
160
+ }
161
+ resultArray[chunkIndex].push({
162
+ index,
163
+ item
164
+ });
165
+ return resultArray;
166
+ }, []);
167
+ const results = [];
168
+ const executeChunk = async chunk => {
169
+ const examples = (0, _utils.getRandomItems)([...results, ...previousExamples], 3);
170
+ const texts = chunk.map(i => {
171
+ if (i.item.type !== "text") {
172
+ throw new Error("Invalid type");
173
+ }
174
+ return `<ITEM_INDEX_${i.index}>\n${i.item.text}\n</ITEM_INDEX_${i.index}>`;
175
+ }).join("\n");
176
+ const extraRowDataFromTableRow = shouldUseTableData ? chunk.map(c => tableAsJsonArray[c.index]) : undefined;
177
+ const shouldWorkaroundGoogleModel = (0, _common.isGoogleModel)(strategy.model);
178
+ const itemSchema = !shouldWorkaroundGoogleModel ? {
179
+ oneOf: [itemEntitySchema, {
180
+ type: "null"
181
+ }]
182
+ } : {
183
+ type: "object",
184
+ properties: {
185
+ value: itemEntitySchema,
186
+ _isNull: {
187
+ type: "boolean",
188
+ description: "If the value is supposed to be null, set this to true and do not provide a value. Otherwise, set it to false and provide the value."
189
+ }
190
+ },
191
+ required: ["_isNull"]
192
+ };
193
+ const itemNullPromptExample = !shouldWorkaroundGoogleModel ? "null" : `{ "_isNull": true }`;
194
+ const result = await extractPropertiesUsingGPT({
195
+ identifier,
196
+ text: texts,
197
+ examples,
198
+ itemEntityName,
199
+ tableHeaders,
200
+ extraRowDataFromTableRow,
201
+ apiKey,
202
+ itemEntitySchema: {
203
+ type: "array",
204
+ description: `Extracted ` + itemEntityName + ` items from the content wrapped with <LIST_ITEM_INDEX_X> and </LIST_ITEM_INDEX_X> tags. each tag represents an item on the list, you should return ` + chunk.length + ` items, one for each index. If the item boundaries do not contain a valid entity, return null for that item with the right index. YOU SHOULD INCLUDE ALL ITEMS IN THE RESPONSE, EVERY INDEX BOUNDARY SHOULD BE REPRESENTED BY AN ITEM. IF the index boundary does not include a valid item you should return null for it.` + ` AND YOU SHOULD ALWAYS HAVE THE RIGHT INDEX FOR THE MISSING ITEMS, **do not put it at the end of the list or change the order of that item if in the list **. if <LIST_ITEM_INDEX_33> and </LIST_ITEM_INDEX_33> does not wrap a valid item, the item you return should be ` + `{ index: 33, isTableHeaderOrFooter: false, item: ${itemNullPromptExample} } where ths index is the number in the tag that's missing the item, ` + ` each <LIST_ITEM_INDEX_X> and </LIST_ITEM_INDEX_X> boundary represents a single list item. do not return the same list item more than once.`,
205
+ minItems: chunk.length,
206
+ maxItems: chunk.length,
207
+ items: {
208
+ type: "object",
209
+ properties: {
210
+ index: {
211
+ type: "number",
212
+ enum: generateRange(chunk[0].index, chunk[chunk.length - 1].index),
213
+ minimum: chunk[0].index,
214
+ maximum: chunk[chunk.length - 1].index,
215
+ description: `Identify the index of an item from the list based on the prefix and suffix around the extracted data. if you extract data between <LIST_ITEM_INDEX_x> and </LIST_ITEM_INDEX_x>, the index should be the value of x. if <LIST_ITEM_INDEX_x> exists and does not have a valid data inside it you still should return an item with index x and value of null , getting the wrong index for the missing item will break the whole extraction. YOU SHOULD INCLUDE ALL ITEMS IN THE RESPONSE, EVERY INDEX BOUNDARY SHOULD BE REPRESENTED BY AN ITEM`
216
+ },
217
+ isTableHeaderOrFooter: {
218
+ type: "boolean",
219
+ description: `If the extracted data is a table header or footer, set this field to true. Otherwise, set it to false.`
220
+ },
221
+ item: itemSchema
222
+ }
223
+ }
224
+ },
225
+ image: undefined,
226
+ strategy
227
+ });
228
+ if (result.isErr()) {
229
+ return (0, _neverthrow.err)(result.error);
230
+ }
231
+ const resultUnWrapped = result.value;
232
+ function isItemRecord(item) {
233
+ return typeof item.value !== "object";
234
+ }
235
+ resultUnWrapped === null || resultUnWrapped === void 0 || resultUnWrapped.forEach(r => {
236
+ if (r.isTableHeaderOrFooter) {
237
+ _Logger.logger.debug(`skipping item at index #${r.index + 1}: ${JSON.stringify(r.item)}, it's detected as a table header or footer.`);
238
+ results[r.index] = {};
239
+ return;
240
+ }
241
+ const item = r.item;
242
+ let itemToUse;
243
+ if (item != null && !isItemRecord(item)) {
244
+ itemToUse = item._isNull ? null : item.value;
245
+ } else {
246
+ itemToUse = item;
247
+ }
248
+ results[r.index] = itemToUse;
249
+ _Logger.logger.debug(`Extracted this info from array item #${r.index + 1}: ${JSON.stringify(itemToUse)}`);
250
+ });
251
+ };
252
+ const [firstChunk, ...otherChunks] = itemsChunks;
253
+ if (firstChunk) {
254
+ const firstChunkResult = await executeChunk(firstChunk);
255
+ if (firstChunkResult !== null && firstChunkResult !== void 0 && firstChunkResult.isErr()) {
256
+ return (0, _neverthrow.err)(firstChunkResult.error);
257
+ }
258
+ }
259
+ if (Array.isArray(otherChunks)) {
260
+ const otherChunksResults = await Promise.all(otherChunks.map(a => executeChunk(a)));
261
+ const otherChunksError = otherChunksResults.find(r => r === null || r === void 0 ? void 0 : r.isErr());
262
+ if (otherChunksError && otherChunksError.isErr()) {
263
+ return (0, _neverthrow.err)(otherChunksError.error);
264
+ }
265
+ }
266
+ return (0, _neverthrow.ok)(results);
267
+ }
268
+ function generateRange(start, end) {
269
+ if (end < start) {
270
+ throw new Error("End value must be greater than or equal to start value.");
271
+ }
272
+ const range = [];
273
+ for (let i = start; i <= end; i++) {
274
+ range.push(i);
275
+ }
276
+ return range;
277
+ }
@@ -0,0 +1,44 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractStructuredListUsingAi = extractStructuredListUsingAi;
7
+ var _neverthrow = require("neverthrow");
8
+ var _errors = require("../errors");
9
+ var _zod = require("zod");
10
+ var _common = require("../../common");
11
+ async function extractStructuredListUsingAi(entityName, itemSchema, data, identifier, strategy, prompt, apiKey) {
12
+ var _data$images;
13
+ const propertiesListSchema = {
14
+ type: "array",
15
+ items: itemSchema
16
+ };
17
+ const result = await (0, _common.extractStructuredDataUsingAi)({
18
+ entityName,
19
+ model: strategy.model,
20
+ jsonSchema: propertiesListSchema,
21
+ text: data.text ? [data.text] : undefined,
22
+ images: (_data$images = data.images) === null || _data$images === void 0 ? void 0 : _data$images.map(image => ({
23
+ data: image,
24
+ image_type: "png"
25
+ })),
26
+ identifier,
27
+ systemMessage: prompt,
28
+ apiKey
29
+ });
30
+ if (result.isErr()) {
31
+ if (result.error.type === "InsufficientAiCredits") {
32
+ return (0, _neverthrow.err)((0, _errors.insufficientAiCredits)(result.error.context));
33
+ }
34
+ if (result.error.type === "NoDataFound") {
35
+ return (0, _neverthrow.ok)([]);
36
+ }
37
+ return (0, _neverthrow.err)((0, _errors.invalidExtractionResult)(result.error.context));
38
+ }
39
+ const parsedResult = _zod.z.array(_zod.z.record(_zod.z.string())).safeParse(result.value.result);
40
+ if (!parsedResult.success) {
41
+ return (0, _neverthrow.err)((0, _errors.invalidExtractionResult)("Failed to parse extraction result."));
42
+ }
43
+ return (0, _neverthrow.ok)(parsedResult.data);
44
+ }
@@ -0,0 +1,94 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.findSetOfXpathsToCreateAnArrayExtractor = findSetOfXpathsToCreateAnArrayExtractor;
7
+ exports.getContainerElement = getContainerElement;
8
+ exports.getListContainerXpath = getListContainerXpath;
9
+ exports.partOfSameArrayXpath = partOfSameArrayXpath;
10
+ exports.verifyThatAllXpathsArePartOfSameArray = verifyThatAllXpathsArePartOfSameArray;
11
+ async function getListContainerXpath(matches) {
12
+ const matchesList = Array.from(matches.entries());
13
+ const exactMatches = matchesList.map(([_, matchSet]) => matchSet.filter(match => match.exact));
14
+ const xpathsSetForExactMatches = exactMatches.map(value => value.map(match => match.nodeXpath)).filter(xpaths => xpaths.length > 0);
15
+ let resultXpaths = findSetOfXpathsToCreateAnArrayExtractor(xpathsSetForExactMatches);
16
+ if (!resultXpaths) {
17
+ const xpathsSetForAllMatches = matchesList.map(([_, matchSet]) => matchSet.map(match => match.nodeXpath)).filter(xpaths => xpaths.length > 0);
18
+ resultXpaths = findSetOfXpathsToCreateAnArrayExtractor(xpathsSetForAllMatches);
19
+ }
20
+ if (!resultXpaths) {
21
+ return null;
22
+ }
23
+ const partOfSameArray = verifyThatAllXpathsArePartOfSameArray(resultXpaths);
24
+ if (!partOfSameArray) {
25
+ return null;
26
+ }
27
+ const containerElement = getContainerElement(resultXpaths);
28
+ return containerElement;
29
+ }
30
+ function findSetOfXpathsToCreateAnArrayExtractor(input) {
31
+ if (input.length === 0) return null;
32
+ const smallestSet = input.reduce((acc, curr) => curr.length < acc.length ? curr : acc);
33
+ smallestSet.sort((a, b) => b.length - a.length);
34
+ for (const xpath of smallestSet) {
35
+ const result = [xpath];
36
+ for (const set of input) {
37
+ if (set === smallestSet) continue;
38
+ const matched = set.find(otherXpath => partOfSameArrayXpath(xpath, otherXpath));
39
+ if (matched) {
40
+ result.push(matched);
41
+ } else {
42
+ break;
43
+ }
44
+ }
45
+ if (result.length === input.length) return result;
46
+ }
47
+ return null;
48
+ }
49
+ function partOfSameArrayXpath(str1, str2) {
50
+ if (str1 === str2) return false;
51
+ const parts1 = str1.split("/");
52
+ const parts2 = str2.split("/");
53
+ if (parts1.length !== parts2.length) return false;
54
+ let numericDifferences = 0;
55
+ for (let i = 0; i < parts1.length; i++) {
56
+ if (parts1[i] !== parts2[i]) {
57
+ const regex = /\d+/g;
58
+ const numbers1 = (parts1[i].match(regex) || []).map(Number);
59
+ const numbers2 = (parts2[i].match(regex) || []).map(Number);
60
+ if (numbers1.length !== numbers2.length) return false;
61
+ let segmentDifferences = 0;
62
+ for (let j = 0; j < numbers1.length; j++) {
63
+ if (numbers1[j] !== numbers2[j]) segmentDifferences++;
64
+ }
65
+ if (segmentDifferences === 0) return false;
66
+ if (segmentDifferences > 1) return false;
67
+ numericDifferences += segmentDifferences;
68
+ if (numericDifferences > 1) return false;
69
+ }
70
+ }
71
+ return numericDifferences === 1;
72
+ }
73
+ function verifyThatAllXpathsArePartOfSameArray(xpaths) {
74
+ const firstPath = xpaths[0];
75
+ for (let i = 1; i < xpaths.length; i++) {
76
+ if (!partOfSameArrayXpath(xpaths[i], firstPath)) {
77
+ return false;
78
+ }
79
+ }
80
+ return true;
81
+ }
82
+ function getContainerElement(xpaths) {
83
+ if (!xpaths.length) return null;
84
+ let commonPrefix = xpaths[0].split("/");
85
+ for (let i = 1; i < xpaths.length; i++) {
86
+ const parts = xpaths[i].split("/");
87
+ let j = 0;
88
+ while (j < commonPrefix.length && j < parts.length && commonPrefix[j] === parts[j]) {
89
+ j++;
90
+ }
91
+ commonPrefix = commonPrefix.slice(0, j);
92
+ }
93
+ return commonPrefix.length ? commonPrefix.join("/") : null;
94
+ }
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getRelativeContainerXpathSelector = getRelativeContainerXpathSelector;
7
+ var _locatorHelpers = require("../../../common/locatorHelpers");
8
+ async function getRelativeContainerXpathSelector(searchRegionLocator, containerPath) {
9
+ const searchRegionXpath = await (0, _locatorHelpers.findXPathForLocator)(searchRegionLocator);
10
+ if (!searchRegionXpath) {
11
+ return null;
12
+ }
13
+ if (searchRegionXpath === containerPath) {
14
+ return ".";
15
+ }
16
+ if (!containerPath) {
17
+ return null;
18
+ }
19
+ return containerPath.replace(`${searchRegionXpath}/`, "");
20
+ }
@@ -0,0 +1,21 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getSimplifiedHtmlPerListItem = getSimplifiedHtmlPerListItem;
7
+ var _getSimplifiedHtml = require("../../objectExtractionHelpers/getSimplifiedHtml");
8
+ async function getSimplifiedHtmlPerListItem(itemsLocators) {
9
+ const result = [];
10
+ for (let i = 0; i < itemsLocators.length; i++) {
11
+ const locator = itemsLocators[i];
12
+ const handle = await locator.elementHandle();
13
+ const htmlForItem = await (0, _getSimplifiedHtml.getSimplifiedHtml)(handle, {
14
+ keepOnlyVisibleElements: false,
15
+ shouldIncludeContentAsProp: true,
16
+ shouldIncludeOnClick: true
17
+ });
18
+ result.push(htmlForItem);
19
+ }
20
+ return result;
21
+ }
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.createJsonFromTable = createJsonFromTable;
7
+ exports.isListTable = isListTable;
8
+ async function isListTable(containerLocator, itemsSimplifiedHtml) {
9
+ const parentTableLocator = containerLocator.locator("xpath=ancestor::table[1]");
10
+ const hasParentTable = (await parentTableLocator.all()).length !== 0;
11
+ if (!hasParentTable) {
12
+ return {
13
+ tableLocater: undefined,
14
+ isTable: false
15
+ };
16
+ }
17
+ const table = await parentTableLocator.elementHandle({
18
+ timeout: 1_000
19
+ });
20
+ const allItemsContainTr = itemsSimplifiedHtml.every(item => item.includes("<tr"));
21
+ return {
22
+ tableLocater: table,
23
+ isTable: !!table && !!allItemsContainTr
24
+ };
25
+ }
26
+ async function createJsonFromTable(page) {
27
+ return await page.evaluate(() => {
28
+ function compressStringSpaces(str) {
29
+ return str.replace(/\s+/g, " ").trim();
30
+ }
31
+ const table = document.querySelector("table");
32
+ if (!table) {
33
+ return [];
34
+ }
35
+ const headers = Array.from(table.querySelectorAll("th")).map(th => th === null || th === void 0 ? void 0 : th.textContent).map(header => header && compressStringSpaces(header));
36
+ const rows = Array.from(table.querySelectorAll("tr"));
37
+ const jsonArray = rows.slice(1).map(tr => {
38
+ const cells = Array.from(tr.querySelectorAll("td"));
39
+ const rowObject = headers.reduce((obj, header, index) => {
40
+ var _cells$index;
41
+ if (header) obj[header] = ((_cells$index = cells[index]) === null || _cells$index === void 0 ? void 0 : _cells$index.textContent) ?? "";
42
+ return obj;
43
+ }, {});
44
+ return rowObject;
45
+ });
46
+ return jsonArray;
47
+ });
48
+ }
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.validateDynamicListExtractorOptions = validateDynamicListExtractorOptions;
7
+ var _neverthrow = require("neverthrow");
8
+ var Errors = _interopRequireWildcard(require("../errors"));
9
+ var _locatorHelpers = require("../../../common/locatorHelpers");
10
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
11
+ async function validateDynamicListExtractorOptions(page, label, options) {
12
+ if (typeof label !== "string") {
13
+ return (0, _neverthrow.err)(Errors.invalidInput("Identifier must be a string."));
14
+ }
15
+ const searchRegion = options.searchRegion ?? page.locator("html");
16
+ const elementExist = (await searchRegion.count().catch(e => {
17
+ return 0;
18
+ })) > 0;
19
+ if (!elementExist) {
20
+ return (0, _neverthrow.err)(Errors.invalidSearchRegion());
21
+ }
22
+ const searchRegionHandler = await searchRegion.elementHandle();
23
+ if (!searchRegionHandler) {
24
+ return (0, _neverthrow.err)(Errors.invalidSearchRegion());
25
+ }
26
+ const pageUrl = page.url();
27
+ const getPageUrlOrigin = () => new URL(pageUrl).origin;
28
+ const safeGetPageUrlOrigin = (0, _neverthrow.fromThrowable)(getPageUrlOrigin, () => Errors.invalidAddressUrl("Cannot get page url origin."));
29
+ const variantKey = options.variantKey ? (0, _neverthrow.ok)(options.variantKey) : safeGetPageUrlOrigin();
30
+ if (variantKey.isErr()) {
31
+ return (0, _neverthrow.err)(variantKey.error);
32
+ }
33
+ const invalidate = options.optionalPropertiesInvalidator ?? (() => []);
34
+ const primaryProperty = Object.entries(options.itemEntitySchema.properties).find(([k, v]) => v.primary);
35
+ return (0, _neverthrow.ok)({
36
+ itemEntityName: options.itemEntityName,
37
+ itemEntitySchema: options.itemEntitySchema,
38
+ variantKey: variantKey.value ?? "about:blank",
39
+ invalidate,
40
+ pageUrl,
41
+ primaryProperty: primaryProperty,
42
+ searchRegionHandler,
43
+ searchRegion,
44
+ hasSearchRegionContainer: !!options.searchRegion,
45
+ label,
46
+ searchRegionKey: options.searchRegion ? (0, _locatorHelpers.getLocatorInternalKey)(options.searchRegion) : null,
47
+ strategy: options.strategy,
48
+ prompt: options.prompt,
49
+ searchRegionXpath: options.searchRegion ? await (0, _locatorHelpers.findXPathForLocator)(options.searchRegion) : undefined,
50
+ apiKey: options.apiKey
51
+ });
52
+ }
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.createAnthropicInstance = createAnthropicInstance;
7
+ var _dotenv = require("dotenv");
8
+ var _jwtTokenManager = require("../../common/jwtTokenManager");
9
+ var _sdk = _interopRequireDefault(require("@anthropic-ai/sdk"));
10
+ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
11
+ (0, _dotenv.config)();
12
+ function createAnthropicInstance(options) {
13
+ if (options !== null && options !== void 0 && options.apiKey) {
14
+ return new _sdk.default({
15
+ apiKey: options.apiKey
16
+ });
17
+ }
18
+ return new _sdk.default({
19
+ apiKey: "--THI_VALUE_WILL_BE_REPLACED_BY_INTUNED_BE--",
20
+ baseURL: `${process.env.FUNCTIONS_DOMAIN}/api/${process.env.INTUNED_WORKSPACE_ID}/functions/${process.env.INTUNED_INTEGRATION_ID}/anthropic`,
21
+ fetch: _jwtTokenManager.backendFunctionsTokenManager.fetchWithToken.bind(_jwtTokenManager.backendFunctionsTokenManager)
22
+ });
23
+ }
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.createOpenAIInstance = createOpenAIInstance;
7
+ var _openai = require("openai");
8
+ var _dotenv = require("dotenv");
9
+ var _jwtTokenManager = require("../../common/jwtTokenManager");
10
+ (0, _dotenv.config)();
11
+ function createOpenAIInstance(options) {
12
+ if (options !== null && options !== void 0 && options.apiKey) {
13
+ return new _openai.OpenAI({
14
+ apiKey: options.apiKey
15
+ });
16
+ }
17
+ const openai = new _openai.OpenAI({
18
+ apiKey: "",
19
+ baseURL: `${process.env.FUNCTIONS_DOMAIN}/api/${process.env.INTUNED_WORKSPACE_ID}/functions/${process.env.INTUNED_INTEGRATION_ID}/openai`,
20
+ fetch: _jwtTokenManager.backendFunctionsTokenManager.fetchWithToken.bind(_jwtTokenManager.backendFunctionsTokenManager)
21
+ });
22
+ return openai;
23
+ }