@intuned/browser-dev 2.2.3-test-build.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.babelrc +21 -0
  2. package/.eslintignore +10 -0
  3. package/.eslintrc.js +39 -0
  4. package/LICENSE +43 -0
  5. package/dist/ai/export.d.js +5 -0
  6. package/dist/ai/export.d.ts +641 -0
  7. package/dist/ai/extractStructuredData.js +320 -0
  8. package/dist/ai/extractStructuredDataUsingAi.js +139 -0
  9. package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
  10. package/dist/ai/extractionHelpers/validateSchema.js +148 -0
  11. package/dist/ai/index.d.ts +641 -0
  12. package/dist/ai/index.js +19 -0
  13. package/dist/ai/isPageLoaded.js +77 -0
  14. package/dist/ai/prompt.js +39 -0
  15. package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
  16. package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
  17. package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
  18. package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
  19. package/dist/ai/tools/index.js +48 -0
  20. package/dist/ai/types/errors.js +67 -0
  21. package/dist/ai/types/models.js +45 -0
  22. package/dist/ai/types/types.js +48 -0
  23. package/dist/ai/validators.js +167 -0
  24. package/dist/common/Logger/index.js +60 -0
  25. package/dist/common/Logger/types.js +5 -0
  26. package/dist/common/SdkError.js +50 -0
  27. package/dist/common/aiModelsValidations.js +32 -0
  28. package/dist/common/browser_scripts.js +2596 -0
  29. package/dist/common/ensureBrowserScripts.js +18 -0
  30. package/dist/common/extendedTest.js +148 -0
  31. package/dist/common/extractionHelpers.js +19 -0
  32. package/dist/common/formatZodError.js +18 -0
  33. package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
  34. package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
  35. package/dist/common/fuzzySearch/utils.js +23 -0
  36. package/dist/common/getModelProvider.js +18 -0
  37. package/dist/common/getSimplifiedHtml.js +122 -0
  38. package/dist/common/hashObject.js +32 -0
  39. package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
  40. package/dist/common/html2markdown/index.js +19 -0
  41. package/dist/common/jwtTokenManager.js +18 -0
  42. package/dist/common/loadRuntime.js +16 -0
  43. package/dist/common/locatorHelpers.js +41 -0
  44. package/dist/common/matching/collectStrings.js +32 -0
  45. package/dist/common/matching/levenshtein.js +40 -0
  46. package/dist/common/matching/matching.js +317 -0
  47. package/dist/common/matching/types.js +1 -0
  48. package/dist/common/noEmpty.js +9 -0
  49. package/dist/common/saveSnapshotWithExamples.js +60 -0
  50. package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
  51. package/dist/common/xpathMapping.js +107 -0
  52. package/dist/helpers/clickUntilExhausted.js +85 -0
  53. package/dist/helpers/downloadFile.js +125 -0
  54. package/dist/helpers/export.d.js +5 -0
  55. package/dist/helpers/export.d.ts +1220 -0
  56. package/dist/helpers/extractMarkdown.js +35 -0
  57. package/dist/helpers/filterEmptyValues.js +54 -0
  58. package/dist/helpers/gotoUrl.js +98 -0
  59. package/dist/helpers/index.d.ts +1220 -0
  60. package/dist/helpers/index.js +128 -0
  61. package/dist/helpers/processDate.js +25 -0
  62. package/dist/helpers/resolveUrl.js +64 -0
  63. package/dist/helpers/sanitizeHtml.js +74 -0
  64. package/dist/helpers/saveFileToS3.js +50 -0
  65. package/dist/helpers/scrollToLoadContent.js +57 -0
  66. package/dist/helpers/tests/extendedTest.js +130 -0
  67. package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
  68. package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
  69. package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
  70. package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
  71. package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
  72. package/dist/helpers/tests/testProcessDate.spec.js +13 -0
  73. package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
  74. package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
  75. package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
  76. package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
  77. package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
  78. package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
  79. package/dist/helpers/types/Attachment.js +115 -0
  80. package/dist/helpers/types/CustomTypeRegistry.js +48 -0
  81. package/dist/helpers/types/RunEnvironment.js +18 -0
  82. package/dist/helpers/types/ValidationError.js +17 -0
  83. package/dist/helpers/types/index.js +51 -0
  84. package/dist/helpers/uploadFileToS3.js +154 -0
  85. package/dist/helpers/utils/getS3Client.js +22 -0
  86. package/dist/helpers/utils/index.js +73 -0
  87. package/dist/helpers/utils/isDownload.js +10 -0
  88. package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
  89. package/dist/helpers/utils/isLocator.js +9 -0
  90. package/dist/helpers/utils/jwtTokenManager.js +18 -0
  91. package/dist/helpers/validateDataUsingSchema.js +103 -0
  92. package/dist/helpers/waitForDomSettled.js +90 -0
  93. package/dist/helpers/withNetworkSettledWait.js +91 -0
  94. package/dist/index.d.js +16 -0
  95. package/dist/index.d.ts +10 -0
  96. package/dist/index.js +16 -0
  97. package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
  98. package/dist/intunedServices/ApiGateway/factory.js +13 -0
  99. package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
  100. package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
  101. package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
  102. package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
  103. package/dist/intunedServices/ApiGateway/types.js +11 -0
  104. package/dist/intunedServices/cache/cache.js +61 -0
  105. package/dist/intunedServices/cache/index.js +12 -0
  106. package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
  107. package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
  108. package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
  109. package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
  110. package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
  111. package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
  112. package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
  113. package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
  114. package/dist/optimized-extractors/common/index.js +55 -0
  115. package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
  116. package/dist/optimized-extractors/common/matching/matching.js +212 -0
  117. package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
  118. package/dist/optimized-extractors/common/matching/types.js +18 -0
  119. package/dist/optimized-extractors/common/matching/utils.js +184 -0
  120. package/dist/optimized-extractors/common/utils.js +58 -0
  121. package/dist/optimized-extractors/export.d.js +5 -0
  122. package/dist/optimized-extractors/export.d.ts +397 -0
  123. package/dist/optimized-extractors/extractArray.js +120 -0
  124. package/dist/optimized-extractors/extractObject.js +104 -0
  125. package/dist/optimized-extractors/index.d.ts +397 -0
  126. package/dist/optimized-extractors/index.js +31 -0
  127. package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
  128. package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
  129. package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
  130. package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
  131. package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
  132. package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
  133. package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
  134. package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
  135. package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
  136. package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
  137. package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
  138. package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
  139. package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
  140. package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
  141. package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
  142. package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
  143. package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
  144. package/dist/optimized-extractors/models/anthropicModel.js +23 -0
  145. package/dist/optimized-extractors/models/openaiModel.js +23 -0
  146. package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
  147. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
  148. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
  149. package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
  150. package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
  151. package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
  152. package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
  153. package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
  154. package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
  155. package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
  156. package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
  157. package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
  158. package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
  159. package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
  160. package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
  161. package/dist/optimized-extractors/types/errors.js +42 -0
  162. package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
  163. package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
  164. package/dist/optimized-extractors/types/types.js +5 -0
  165. package/dist/optimized-extractors/validators.js +152 -0
  166. package/dist/vite-env.d.js +1 -0
  167. package/dist/vite-env.d.ts +9 -0
  168. package/docs.md +14 -0
  169. package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
  170. package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
  171. package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
  172. package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
  173. package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
  174. package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
  175. package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
  176. package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
  177. package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
  178. package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
  179. package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
  180. package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
  181. package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
  182. package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
  183. package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
  184. package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
  185. package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
  186. package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
  187. package/generated-docs/helpers/functions/processDate.mdx +55 -0
  188. package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
  189. package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
  190. package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
  191. package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
  192. package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
  193. package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
  194. package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
  195. package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
  196. package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
  197. package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
  198. package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
  199. package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
  200. package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
  201. package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
  202. package/how-to-run-tests.md +10 -0
  203. package/intuned-runtime-setup.md +13 -0
  204. package/package.json +119 -0
  205. package/tsconfig.eslint.json +5 -0
  206. package/tsconfig.json +26 -0
@@ -0,0 +1,122 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractStructuredDataUsingAiInstance = extractStructuredDataUsingAiInstance;
7
+ var _neverthrow = require("neverthrow");
8
+ var Errors = _interopRequireWildcard(require("../types/errors"));
9
+ var _utils = require("./utils");
10
+ var _Logger = require("../../common/Logger");
11
+ var _ai = require("ai");
12
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
13
+ async function extractStructuredDataUsingAiInstance(input) {
14
+ var _apiResult$value$tool, _apiResult$value$tool2, _apiResult$value$resp;
15
+ const {
16
+ entityName,
17
+ model,
18
+ jsonSchema: originalJsonSchema,
19
+ systemMessage,
20
+ text,
21
+ extraUserMessages,
22
+ images,
23
+ apiKey,
24
+ apiName,
25
+ maxTokens
26
+ } = input;
27
+ const processedJsonSchema = (0, _utils.processInputSchema)(originalJsonSchema, entityName);
28
+ const content = [];
29
+ if (extraUserMessages) {
30
+ content.push(...extraUserMessages.map(message => ({
31
+ type: "text",
32
+ text: message
33
+ })));
34
+ }
35
+ if (text) {
36
+ content.push(...text.map(t => ({
37
+ type: "text",
38
+ text: t
39
+ })));
40
+ }
41
+ if (images) {
42
+ const imageContent = images.map(image => ({
43
+ type: "image",
44
+ image: image.data,
45
+ mimeType: image.image_type
46
+ }));
47
+ content.push(...imageContent);
48
+ }
49
+ const toolName = `extract_${entityName}`;
50
+ const apiResult = await (0, _neverthrow.fromPromise)((0, _ai.generateText)({
51
+ model,
52
+ temperature: 0,
53
+ messages: [{
54
+ role: "system",
55
+ content: `${systemMessage ?? ""}, ` + `using the ` + (originalJsonSchema.description ? `. Here is more info about the entity that we are trying to extract: ` + originalJsonSchema.description : "")
56
+ }, {
57
+ role: "user",
58
+ content
59
+ }],
60
+ toolChoice: "required",
61
+ tools: {
62
+ [toolName]: (0, _ai.tool)({
63
+ description: `Extract ${entityName} mentioned in the text and images. Rely on the parameters for more info.`,
64
+ inputSchema: (0, _ai.jsonSchema)(processedJsonSchema),
65
+ type: "function"
66
+ }),
67
+ no_data_found: {
68
+ description: `you should call this tool you are asked to extract data using ${toolName} and you couldn't find any data, make this your last resort, if you are sure that there is no data in the text or images`,
69
+ type: "function",
70
+ inputSchema: (0, _ai.jsonSchema)({
71
+ type: "object",
72
+ properties: {}
73
+ })
74
+ }
75
+ }
76
+ }), error => {
77
+ if (error instanceof _ai.APICallError) {
78
+ const responseBodyStr = error.responseBody;
79
+ const responseBodyResult = (0, _neverthrow.fromThrowable)(JSON.parse, () => null)(responseBodyStr ?? "");
80
+ const responseBody = responseBodyResult.isOk() ? responseBodyResult.value : null;
81
+ if ((error === null || error === void 0 ? void 0 : error.statusCode) === 449) {
82
+ return Errors.insufficientAiCredits(responseBody === null || responseBody === void 0 ? void 0 : responseBody.error);
83
+ }
84
+ return Errors.AiCallFailed(`Failed to call ${apiName} api with status ${error === null || error === void 0 ? void 0 : error.statusCode}: ${error.message}`, error);
85
+ }
86
+ return Errors.AiCallFailed(`Failed to call ${apiName} api: ${error.message}`, error);
87
+ });
88
+ if (apiResult.isErr()) {
89
+ return (0, _neverthrow.err)(apiResult.error);
90
+ }
91
+ if (apiResult.value.finishReason === "length") {
92
+ return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
93
+ }
94
+ const noDataFound = (_apiResult$value$tool = apiResult.value.toolCalls) === null || _apiResult$value$tool === void 0 ? void 0 : _apiResult$value$tool.some(content => content.toolName === "no_data_found");
95
+ if (noDataFound) {
96
+ return (0, _neverthrow.err)(Errors.NoDataFound("data isn't found in the text or images."));
97
+ }
98
+ const toolResult = (_apiResult$value$tool2 = apiResult.value.toolCalls) === null || _apiResult$value$tool2 === void 0 ? void 0 : _apiResult$value$tool2.find(content => content.toolName === toolName);
99
+ if (!toolResult) {
100
+ return (0, _neverthrow.err)(Errors.NoToolUsage("the model was not able to extract data correctly."));
101
+ }
102
+ const extractedData = toolResult.input;
103
+ if (!extractedData) {
104
+ return (0, _neverthrow.err)(Errors.invalidExtractionResult("No extraction result found."));
105
+ }
106
+ const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, extractedData);
107
+ const formatted = (0, _utils.cleanupAiResult)(result);
108
+ const callCost = (_apiResult$value$resp = apiResult.value.response.headers) === null || _apiResult$value$resp === void 0 ? void 0 : _apiResult$value$resp["x-ai-cost-in-cents"];
109
+ if (input.logAiCallCost) {
110
+ if (apiKey) {
111
+ _Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
112
+ } else if (callCost) {
113
+ const cost = parseFloat(callCost);
114
+ if (!isNaN(cost)) {
115
+ _Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
116
+ }
117
+ }
118
+ }
119
+ return (0, _neverthrow.ok)({
120
+ result: formatted
121
+ });
122
+ }
@@ -0,0 +1,162 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.getTableHeadersUsingAi = getTableHeadersUsingAi;
7
+ var _anthropicModel = require("../models/anthropicModel");
8
+ var _zod = require("zod");
9
+ var _imageSize = require("image-size");
10
+ var _neverthrow = require("neverthrow");
11
+ var Errors = _interopRequireWildcard(require("../types/errors"));
12
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
13
+ async function getTableHeadersUsingAi(handle, identifier) {
14
+ var _response$error;
15
+ let image;
16
+ try {
17
+ image = await handle.screenshot({
18
+ type: "png",
19
+ timeout: 3_000
20
+ });
21
+ } catch (e) {
22
+ return (0, _neverthrow.ok)({
23
+ headers: []
24
+ });
25
+ }
26
+ const {
27
+ width,
28
+ height
29
+ } = (0, _imageSize.imageSize)(image);
30
+ if (!width) return (0, _neverthrow.ok)({
31
+ headers: []
32
+ });
33
+ const imageToUse = height && height > 500 ? await cropImage(image, handle, {
34
+ height: 500,
35
+ width,
36
+ x: 0,
37
+ y: 0
38
+ }) : image;
39
+ if (!imageToUse) return (0, _neverthrow.ok)({
40
+ headers: []
41
+ });
42
+ const anthropic = (0, _anthropicModel.createAnthropicInstance)();
43
+ const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
44
+ max_tokens: 4096,
45
+ temperature: 0,
46
+ messages: [{
47
+ role: "user",
48
+ content: [{
49
+ type: "text",
50
+ text: `extract table headers from the image, you must use the get_table_headers tool, make sure to get the headers in order, "column_number" is the number of the table column that has this header. if there's a column with no header that still counts as a column and should be taken into account.`
51
+ }, {
52
+ type: "image",
53
+ source: {
54
+ data: imageToUse.toString("base64"),
55
+ media_type: "image/png",
56
+ type: "base64"
57
+ }
58
+ }]
59
+ }],
60
+ model: "claude-3-haiku-20240307",
61
+ tools: [{
62
+ input_schema: {
63
+ type: "object",
64
+ properties: {
65
+ headers: {
66
+ type: "array",
67
+ items: {
68
+ type: "object",
69
+ description: "Table headers",
70
+ properties: {
71
+ column_number: {
72
+ type: "number",
73
+ description: "The column number of the header"
74
+ },
75
+ header: {
76
+ type: "string"
77
+ }
78
+ },
79
+ required: ["column_number", "header"]
80
+ }
81
+ }
82
+ },
83
+ required: ["headers"]
84
+ },
85
+ name: "get_table_headers",
86
+ description: `a function to extract table headers from an image, the headers should be in order and the column number should be the number of the column that has this header. if there's a column with no header that still counts as a column and should be taken into account. you should still need to`
87
+ }]
88
+ }), error => {
89
+ const typedError = error;
90
+ if (typedError.status === 449) return Errors.insufficientAiCredits(`🔴 ${typedError.error.error}`);
91
+ });
92
+ if (response.isErr() && ((_response$error = response.error) === null || _response$error === void 0 ? void 0 : _response$error.type) === "InsufficientAiCredits") {
93
+ return (0, _neverthrow.err)(response.error);
94
+ }
95
+ if (response.isErr()) {
96
+ return (0, _neverthrow.ok)({
97
+ headers: []
98
+ });
99
+ }
100
+ const unwrappedResponse = response.value;
101
+ const tool = unwrappedResponse.content.find(t => t.type === "tool_use");
102
+ if (!tool) return (0, _neverthrow.ok)({
103
+ headers: []
104
+ });
105
+ const headersParsingResults = _zod.z.object({
106
+ headers: _zod.z.array(_zod.z.object({
107
+ column_number: _zod.z.number(),
108
+ header: _zod.z.string()
109
+ }))
110
+ }).safeParse(tool.input);
111
+ if (!headersParsingResults.success) {
112
+ return (0, _neverthrow.ok)({
113
+ headers: []
114
+ });
115
+ }
116
+ return (0, _neverthrow.ok)({
117
+ headers: headersParsingResults.data.headers.map(i => i.header)
118
+ });
119
+ }
120
+ async function cropImage(image, handle, {
121
+ height: cropHeight,
122
+ width: cropWidth,
123
+ x,
124
+ y
125
+ }) {
126
+ const base64Image = image.toString("base64");
127
+ const dataUrl = `data:image/jpeg;base64,${base64Image}`;
128
+ const croppedDataUrl = await Promise.race([handle.evaluate((_, {
129
+ cropHeight,
130
+ cropWidth,
131
+ x,
132
+ y,
133
+ url
134
+ }) => {
135
+ return new Promise(resolve => {
136
+ const img = document.createElement("img");
137
+ const canvas = document.createElement("canvas");
138
+ document.body.appendChild(img);
139
+ img.onload = () => {
140
+ canvas.width = cropWidth;
141
+ canvas.height = cropHeight;
142
+ const ctx = canvas.getContext("2d");
143
+ ctx.drawImage(img, x, y, cropWidth, cropHeight, 0, 0, cropWidth, cropHeight);
144
+ const resultImage = canvas.toDataURL().split(";base64,").pop();
145
+ resolve(resultImage);
146
+ img.remove();
147
+ canvas.remove();
148
+ };
149
+ img.src = url;
150
+ });
151
+ }, {
152
+ url: dataUrl,
153
+ x,
154
+ y,
155
+ cropWidth,
156
+ cropHeight
157
+ }), new Promise(resolve => setTimeout(() => {
158
+ resolve(undefined);
159
+ }, 5000))]);
160
+ if (!croppedDataUrl) return undefined;
161
+ return Buffer.from(croppedDataUrl, "base64");
162
+ }
@@ -0,0 +1,55 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.extractStructuredDataUsingAi = extractStructuredDataUsingAi;
7
+ exports.isClaudeModel = isClaudeModel;
8
+ exports.isGoogleModel = isGoogleModel;
9
+ exports.isOpenAiModel = isOpenAiModel;
10
+ var _neverthrow = require("neverthrow");
11
+ var _extractStructuredDataUsingClaude = require("./extractStructuredDataUsingClaude");
12
+ var _extractStructuredDataUsingOpenAi = require("./extractStructuredDataUsingOpenAi");
13
+ var _utils = require("./utils");
14
+ var _aiModelsValidations = require("../../common/aiModelsValidations");
15
+ var _extractStructuredDataUsingGoogle = require("./extractStructuredDataUsingGoogle");
16
+ function isClaudeModel(model) {
17
+ return _aiModelsValidations.CLAUDE_MODELS.includes(model);
18
+ }
19
+ function isGoogleModel(model) {
20
+ return _aiModelsValidations.GOOGLE_MODELS.includes(model);
21
+ }
22
+ function isOpenAiModel(model) {
23
+ return _aiModelsValidations.GPT_MODELS.includes(model);
24
+ }
25
+ async function extractStructuredDataUsingAi(input) {
26
+ let extractionResult;
27
+ const {
28
+ model
29
+ } = input;
30
+ if (isOpenAiModel(model)) {
31
+ extractionResult = await (0, _extractStructuredDataUsingOpenAi.extractStructuredDataUsingOpenAi)({
32
+ ...input,
33
+ model
34
+ });
35
+ } else if (isGoogleModel(model)) {
36
+ extractionResult = await (0, _extractStructuredDataUsingGoogle.extractStructuredDataUsingGoogle)({
37
+ ...input,
38
+ model
39
+ });
40
+ } else {
41
+ extractionResult = await (0, _extractStructuredDataUsingClaude.extractStructuredDataUsingClaude)({
42
+ ...input,
43
+ model
44
+ });
45
+ }
46
+ if (extractionResult.isErr()) {
47
+ return (0, _neverthrow.err)(extractionResult.error);
48
+ }
49
+ const {
50
+ result
51
+ } = extractionResult.value;
52
+ return (0, _neverthrow.ok)({
53
+ result: (0, _utils.cleanupAiResult)(result)
54
+ });
55
+ }
@@ -0,0 +1,84 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.isTableHeaderOrFooter = isTableHeaderOrFooter;
7
+ var _anthropicModel = require("../models/anthropicModel");
8
+ var _zod = require("zod");
9
+ var _neverthrow = require("neverthrow");
10
+ var Errors = _interopRequireWildcard(require("../types/errors"));
11
+ function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
12
+ async function isTableHeaderOrFooter(content) {
13
+ var _response$error;
14
+ if (!content) {
15
+ return (0, _neverthrow.ok)({
16
+ isHeader: false
17
+ });
18
+ }
19
+ const anthropic = (0, _anthropicModel.createAnthropicInstance)();
20
+ const itemContent = typeof content === "string" ? {
21
+ type: "text",
22
+ text: content
23
+ } : {
24
+ type: "image",
25
+ source: {
26
+ data: content.toString("base64"),
27
+ media_type: "image/png",
28
+ type: "base64"
29
+ }
30
+ };
31
+ const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
32
+ max_tokens: 4096,
33
+ temperature: 0,
34
+ messages: [{
35
+ role: "user",
36
+ content: [{
37
+ type: "text",
38
+ text: `given the following content, you need to decide if the content is a html table header or a table footer,
39
+ a table header is a row that contains labels for table columns, and footer usually has pagination information or summary of the table`
40
+ }, itemContent]
41
+ }],
42
+ model: "claude-3-haiku-20240307",
43
+ tools: [{
44
+ input_schema: {
45
+ type: "object",
46
+ properties: {
47
+ isTableHeaderOrFooter: {
48
+ type: "boolean"
49
+ }
50
+ },
51
+ required: ["isTableHeaderOrFooter"]
52
+ },
53
+ name: "is_table_header_or_footer",
54
+ description: `given a text or image content, decide if the content is a table header or footer or not.`
55
+ }]
56
+ }), error => {
57
+ const typedError = error;
58
+ if (typedError.status === 449) return Errors.insufficientAiCredits(`🔴 ${typedError.error.error}`);
59
+ });
60
+ if (response.isErr() && ((_response$error = response.error) === null || _response$error === void 0 ? void 0 : _response$error.type) === "InsufficientAiCredits") {
61
+ return (0, _neverthrow.err)(response.error);
62
+ }
63
+ if (response.isErr()) {
64
+ return (0, _neverthrow.ok)({
65
+ isHeader: false
66
+ });
67
+ }
68
+ const unwrappedResponse = response.value;
69
+ const tool = unwrappedResponse.content.find(t => t.type === "tool_use");
70
+ if (!tool) return (0, _neverthrow.ok)({
71
+ isHeader: false
72
+ });
73
+ const headersParsingResults = _zod.z.object({
74
+ isTableHeaderOrFooter: _zod.z.boolean()
75
+ }).safeParse(tool.input);
76
+ if (!headersParsingResults.success) {
77
+ return (0, _neverthrow.ok)({
78
+ isHeader: false
79
+ });
80
+ }
81
+ return (0, _neverthrow.ok)({
82
+ isHeader: headersParsingResults.data.isTableHeaderOrFooter
83
+ });
84
+ }
@@ -0,0 +1,212 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.MatchSource = exports.MatchMode = void 0;
7
+ exports.filterAndRankMatches = filterAndRankMatches;
8
+ exports.matchStringsWithDomContent = matchStringsWithDomContent;
9
+ exports.matchStringsWithDomContentInBrowser = matchStringsWithDomContentInBrowser;
10
+ exports.normalizeSpacing = normalizeSpacing;
11
+ exports.rankMatch = rankMatch;
12
+ exports.removePunctuationAndSpaces = removePunctuationAndSpaces;
13
+ exports.replaceWithBestMatches = replaceWithBestMatches;
14
+ exports.selectBestMatch = selectBestMatch;
15
+ var _levenshtein = require("../../../common/matching/levenshtein");
16
+ var _ensureBrowserScripts = require("../../../common/ensureBrowserScripts");
17
+ let MatchMode = exports.MatchMode = function (MatchMode) {
18
+ MatchMode["FULL"] = "full";
19
+ MatchMode["PARTIAL"] = "partial";
20
+ MatchMode["FUZZY"] = "fuzzy";
21
+ return MatchMode;
22
+ }({});
23
+ let MatchSource = exports.MatchSource = function (MatchSource) {
24
+ MatchSource["ATTRIBUTE"] = "attribute";
25
+ MatchSource["TEXT_CONTENT"] = "text_content";
26
+ MatchSource["DIRECT_TEXT_NODE"] = "direct_text_node";
27
+ return MatchSource;
28
+ }({});
29
+ function normalizeSpacing(text) {
30
+ let normalized = text.replace(/\n/g, " ").replace(/\t/g, " ");
31
+ normalized = normalized.split(/\s+/).join(" ");
32
+ return normalized.trim();
33
+ }
34
+ function removePunctuationAndSpaces(s) {
35
+ return s.replace(/[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~\s]/g, "");
36
+ }
37
+ function rankMatch(original, match) {
38
+ try {
39
+ const normalizedOriginal = normalizeSpacing(original).toLowerCase();
40
+ const normalizedMatch = normalizeSpacing(match).toLowerCase();
41
+ const ratio = (0, _levenshtein.levenshteinRatio)(normalizedOriginal, normalizedMatch);
42
+ const lenOriginal = normalizedOriginal.length;
43
+ if (lenOriginal > 20 && ratio > 0.85) {
44
+ return "HIGH";
45
+ }
46
+ const normalizedOriginalNoPunct = removePunctuationAndSpaces(normalizedOriginal);
47
+ const normalizedMatchNoPunct = removePunctuationAndSpaces(normalizedMatch);
48
+ if (normalizedOriginalNoPunct === normalizedMatchNoPunct) {
49
+ return "HIGH";
50
+ }
51
+ return "LOW";
52
+ } catch (error) {
53
+ console.warn("Error in rankMatch, falling back to simple comparison", error);
54
+ const normalizedOriginal = removePunctuationAndSpaces(normalizeSpacing(original).toLowerCase());
55
+ const normalizedMatch = removePunctuationAndSpaces(normalizeSpacing(match).toLowerCase());
56
+ if (normalizedOriginal === normalizedMatch) {
57
+ return "HIGH";
58
+ }
59
+ return "LOW";
60
+ }
61
+ }
62
+ function selectBestMatch(original, matches) {
63
+ const exactMatches = matches.filter(match => match.match_mode !== MatchMode.FUZZY);
64
+ if (exactMatches.length > 0) {
65
+ return exactMatches[0].matched_value;
66
+ }
67
+ const fuzzyMatches = matches.filter(match => match.match_mode === MatchMode.FUZZY);
68
+ const rankedFuzzyMatches = fuzzyMatches.map(match => ({
69
+ match,
70
+ rank: rankMatch(original, match.matched_value)
71
+ })).filter(({
72
+ rank
73
+ }) => rank === "HIGH");
74
+ if (rankedFuzzyMatches.length > 0) {
75
+ const sortedFuzzyMatches = rankedFuzzyMatches.sort((a, b) => {
76
+ const distA = a.match.fuzzy_distance ?? Infinity;
77
+ const distB = b.match.fuzzy_distance ?? Infinity;
78
+ return distA - distB;
79
+ });
80
+ return sortedFuzzyMatches[0].match.matched_value;
81
+ }
82
+ return null;
83
+ }
84
+ async function matchStringsWithDomContent(pageObject, stringsList, container) {
85
+ try {
86
+ await (0, _ensureBrowserScripts.ensureBrowserScripts)(pageObject);
87
+ let handle;
88
+ if (container) {
89
+ handle = container;
90
+ } else {
91
+ handle = await pageObject.locator("html").elementHandle();
92
+ }
93
+ const matches = await pageObject.evaluate(async ([container, searchTexts]) => {
94
+ try {
95
+ if (typeof window.__INTUNED__ !== "undefined" && typeof window.__INTUNED__.matchStringsWithDomContent === "function") {
96
+ return await window.__INTUNED__.matchStringsWithDomContent(container, searchTexts);
97
+ } else {
98
+ return searchTexts.reduce((acc, text) => {
99
+ acc[text] = [];
100
+ return acc;
101
+ }, {});
102
+ }
103
+ } catch (error) {
104
+ console.error("Error matching strings with DOM content:", error);
105
+ return searchTexts.reduce((acc, text) => {
106
+ acc[text] = [];
107
+ return acc;
108
+ }, {});
109
+ }
110
+ }, [handle, stringsList]);
111
+ return matches;
112
+ } catch (e) {
113
+ console.warn("Error matching strings with DOM content:", e);
114
+ return stringsList.reduce((acc, string) => {
115
+ acc[string] = [];
116
+ return acc;
117
+ }, {});
118
+ }
119
+ }
120
+ async function replaceWithBestMatches(stringsToMatch, pageObject, container) {
121
+ const matchesMap = await matchStringsWithDomContent(pageObject, stringsToMatch, container);
122
+ return Object.fromEntries(Object.entries(matchesMap).map(([string, matches]) => [string, matches.length > 0 ? selectBestMatch(string, matches) : string]));
123
+ }
124
+ async function filterAndRankMatches(frame, matches) {
125
+ const filteredMatches = matches.filter(match => {
126
+ const xpath = match.xpath;
127
+ return !(xpath.includes("[name()='svg']/") || xpath.includes("/path") || xpath.includes("/style"));
128
+ });
129
+ async function getVisibility(match) {
130
+ try {
131
+ const locator = frame.locator(`xpath=${match.xpath}`);
132
+ return await locator.isVisible({
133
+ timeout: 100
134
+ });
135
+ } catch {
136
+ return false;
137
+ }
138
+ }
139
+ const visibilityPromises = filteredMatches.map(match => getVisibility(match));
140
+ const visibilityResults = await Promise.all(visibilityPromises);
141
+ const modeOrder = {
142
+ full: 3,
143
+ partial: 2,
144
+ fuzzy: 1
145
+ };
146
+ const sourceOrder = {
147
+ direct_text_node: 3,
148
+ text_content: 2,
149
+ attribute: 1
150
+ };
151
+ function sortKey(match, visible) {
152
+ const modeKey = modeOrder[match.match_mode.toLowerCase()] || 0;
153
+ const sourceKey = sourceOrder[match.match_source.toLowerCase()] || 0;
154
+ const visibleKey = visible ? 1 : 0;
155
+ let partialScore = 0;
156
+ if (match.match_mode.toLowerCase() === "partial" && match.matched_source_value && match.matched_value) {
157
+ const extraChars = match.matched_source_value.length - match.matched_value.length;
158
+ partialScore = extraChars === 0 ? 1 : 1 / extraChars;
159
+ }
160
+ return modeKey + sourceKey + visibleKey + partialScore;
161
+ }
162
+ const matchesWithVisibility = filteredMatches.map((match, index) => ({
163
+ match,
164
+ visible: visibilityResults[index]
165
+ }));
166
+ const sortedMatchesWithVisibility = matchesWithVisibility.sort((a, b) => sortKey(b.match, b.visible) - sortKey(a.match, a.visible));
167
+ const sortedMatches = sortedMatchesWithVisibility.map(item => item.match);
168
+ const seenXpaths = new Set();
169
+ const uniqueMatches = [];
170
+ for (const match of sortedMatches) {
171
+ if (seenXpaths.has(match.xpath)) {
172
+ continue;
173
+ }
174
+ seenXpaths.add(match.xpath);
175
+ uniqueMatches.push(match);
176
+ }
177
+ return uniqueMatches;
178
+ }
179
+ async function matchStringsWithDomContentInBrowser(frame, stringsList, container) {
180
+ let handle;
181
+ if (container) {
182
+ handle = await container.elementHandle();
183
+ } else {
184
+ handle = await frame.locator("html").elementHandle();
185
+ }
186
+ console.info(`Searching for ${stringsList.length} strings in the DOM:`, stringsList);
187
+ const matches = await frame.evaluate(async ([container, searchTexts]) => {
188
+ const result = await window.__INTUNED__.matchStringsWithDomContent(container, searchTexts);
189
+ return result;
190
+ }, [handle, stringsList]);
191
+ let frames = [];
192
+ if ("childFrames" in frame) {
193
+ frames = frame.childFrames();
194
+ } else if ("frames" in frame) {
195
+ frames = frame.frames();
196
+ }
197
+ for (const subframe of frames) {
198
+ try {
199
+ const frameMatches = await matchStringsWithDomContentInBrowser(subframe, stringsList, null);
200
+ for (const [string, stringMatches] of Object.entries(frameMatches)) {
201
+ if (string in matches) {
202
+ matches[string].push(...stringMatches);
203
+ } else {
204
+ matches[string] = stringMatches;
205
+ }
206
+ }
207
+ } catch {
208
+ continue;
209
+ }
210
+ }
211
+ return matches;
212
+ }