@intuned/browser-dev 2.2.3-unify-sdks.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai-extractors/AnthropicClient/index.js +23 -0
- package/dist/ai-extractors/export.d.js +5 -0
- package/dist/ai-extractors/export.d.ts +422 -0
- package/dist/ai-extractors/extractStructuredData.js +79 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +7 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +42 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +149 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +144 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +123 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +96 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +5 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +53 -0
- package/dist/ai-extractors/extractionHelpers/types.js +5 -0
- package/dist/ai-extractors/fileExtractors.js +176 -0
- package/dist/ai-extractors/index.js +31 -0
- package/dist/ai-extractors/jsonSchema.d.js +5 -0
- package/dist/ai-extractors/jsonSchema.d.ts +49 -0
- package/dist/ai-extractors/openAiClients/index.js +23 -0
- package/dist/ai-extractors/validators.js +239 -0
- package/dist/browser/ai/export.d.js +3 -0
- package/dist/browser/ai/export.d.ts +587 -0
- package/dist/browser/ai/extractMarkdown.js +15 -0
- package/dist/browser/ai/extractStructuredData.js +231 -0
- package/dist/browser/ai/extractStructuredDataUsingAi.js +140 -0
- package/dist/browser/ai/extractionHelpers/screenshotHelpers.js +55 -0
- package/dist/browser/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/browser/ai/index.d.ts +587 -0
- package/dist/browser/ai/index.js +19 -0
- package/dist/browser/ai/isPageLoaded.js +67 -0
- package/dist/browser/ai/prompt.js +39 -0
- package/dist/browser/ai/tests/testCheckAllTypesAreStrings.spec.js +143 -0
- package/dist/browser/ai/tests/testExtractStructuredData.spec.js +622 -0
- package/dist/browser/ai/tools/index.js +48 -0
- package/dist/browser/ai/types/errors.js +67 -0
- package/dist/browser/ai/types/models.js +45 -0
- package/dist/browser/ai/types/types.js +48 -0
- package/dist/browser/ai/validators.js +136 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +50 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +17 -0
- package/dist/common/environmentVariables.js +16 -0
- package/dist/common/eventTracking/getAiTrackingHeaders.js +31 -0
- package/dist/common/eventTracking/getFileTrackingHeaders.js +23 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +1 -0
- package/dist/helpers/export.d.ts +1294 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +93 -0
- package/dist/helpers/index.d.ts +1294 -0
- package/dist/helpers/index.js +115 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +63 -0
- package/dist/helpers/sanitizeHtml.js +73 -0
- package/dist/helpers/saveFileToS3.js +46 -0
- package/dist/helpers/scrollToLoadContent.js +50 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +197 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testIsPageLoaded.spec.js +285 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testSimplifyHtml.spec.js +251 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +380 -0
- package/dist/helpers/tests/testWaitForDomSettled.spec.js +169 -0
- package/dist/helpers/tests/testWaitForNetworkIdle.spec.js +115 -0
- package/dist/helpers/types/Attachment.js +81 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +153 -0
- package/dist/helpers/utils/getS3Client.js +21 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +119 -0
- package/dist/helpers/waitForDomSettled.js +182 -0
- package/dist/helpers/waitForNetworkIdle.js +191 -0
- package/dist/index.d.js +82 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +84 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +87 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +221 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +149 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +145 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +175 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +97 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.other = exports.invalidInput = exports.invalidExtractionResult = exports.insufficientAiCredits = exports.NoToolUsage = exports.NoDataFound = exports.AiCallFailed = void 0;
|
|
7
|
+
const NoDataFound = context => ({
|
|
8
|
+
type: "NoDataFound",
|
|
9
|
+
context
|
|
10
|
+
});
|
|
11
|
+
exports.NoDataFound = NoDataFound;
|
|
12
|
+
const NoToolUsage = context => ({
|
|
13
|
+
type: "NoToolUsage",
|
|
14
|
+
context
|
|
15
|
+
});
|
|
16
|
+
exports.NoToolUsage = NoToolUsage;
|
|
17
|
+
const AiCallFailed = (context, error) => ({
|
|
18
|
+
type: "AiCallFailed",
|
|
19
|
+
context,
|
|
20
|
+
error
|
|
21
|
+
});
|
|
22
|
+
exports.AiCallFailed = AiCallFailed;
|
|
23
|
+
const invalidExtractionResult = context => ({
|
|
24
|
+
type: "InvalidExtractionResult",
|
|
25
|
+
context
|
|
26
|
+
});
|
|
27
|
+
exports.invalidExtractionResult = invalidExtractionResult;
|
|
28
|
+
const invalidInput = context => ({
|
|
29
|
+
type: "invalidInput",
|
|
30
|
+
context
|
|
31
|
+
});
|
|
32
|
+
exports.invalidInput = invalidInput;
|
|
33
|
+
const other = context => ({
|
|
34
|
+
type: "other",
|
|
35
|
+
context
|
|
36
|
+
});
|
|
37
|
+
exports.other = other;
|
|
38
|
+
const insufficientAiCredits = context => ({
|
|
39
|
+
type: "InsufficientAiCredits",
|
|
40
|
+
context
|
|
41
|
+
});
|
|
42
|
+
exports.insufficientAiCredits = insufficientAiCredits;
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingClaude = extractStructuredDataUsingClaude;
|
|
7
|
+
var _AnthropicClient = require("../../AnthropicClient");
|
|
8
|
+
var _neverthrow = require("neverthrow");
|
|
9
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
10
|
+
var _utils = require("./utils");
|
|
11
|
+
var _getAiTrackingHeaders = require("../../../common/eventTracking/getAiTrackingHeaders");
|
|
12
|
+
var _environmentVariables = require("../../../common/environmentVariables");
|
|
13
|
+
var _runtime = require("@intuned/runtime");
|
|
14
|
+
var _Logger = require("../../../common/Logger");
|
|
15
|
+
var _aiModelsValidations = require("../../../common/aiModelsValidations");
|
|
16
|
+
var _constants = require("./constants");
|
|
17
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
18
|
+
async function extractStructuredDataUsingClaude(input) {
|
|
19
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3, _getExecutionContext4;
|
|
20
|
+
const {
|
|
21
|
+
entityName,
|
|
22
|
+
model,
|
|
23
|
+
jsonSchema: originalJsonSchema,
|
|
24
|
+
systemMessage,
|
|
25
|
+
text,
|
|
26
|
+
extraUserMessages,
|
|
27
|
+
images,
|
|
28
|
+
apiKey
|
|
29
|
+
} = input;
|
|
30
|
+
const processedJsonSchema = (0, _utils.processInputSchema)(originalJsonSchema, entityName);
|
|
31
|
+
const toolName = `extract_${entityName}`;
|
|
32
|
+
const content = [];
|
|
33
|
+
if (extraUserMessages) {
|
|
34
|
+
const mappedExtraMessages = extraUserMessages.map(message => ({
|
|
35
|
+
type: "text",
|
|
36
|
+
text: message
|
|
37
|
+
}));
|
|
38
|
+
content.push(...mappedExtraMessages);
|
|
39
|
+
}
|
|
40
|
+
content.push({
|
|
41
|
+
type: "text",
|
|
42
|
+
text: `extract ` + entityName + ` from the text and images, Use must the ` + toolName + ` tool, ` + (originalJsonSchema.description ? entityName + ` is ` + originalJsonSchema.description : "")
|
|
43
|
+
});
|
|
44
|
+
if (text) {
|
|
45
|
+
content.push(...text.map(t => ({
|
|
46
|
+
type: "text",
|
|
47
|
+
text: t
|
|
48
|
+
})));
|
|
49
|
+
}
|
|
50
|
+
if (images) {
|
|
51
|
+
const imagesContent = images.map(image => ({
|
|
52
|
+
type: "image",
|
|
53
|
+
source: {
|
|
54
|
+
data: image.data.toString("base64"),
|
|
55
|
+
media_type: `image/${image.image_type}`,
|
|
56
|
+
type: "base64"
|
|
57
|
+
}
|
|
58
|
+
}));
|
|
59
|
+
content.push(...imagesContent);
|
|
60
|
+
}
|
|
61
|
+
const anthropic = (0, _AnthropicClient.createAnthropicInstance)({
|
|
62
|
+
apiKey
|
|
63
|
+
});
|
|
64
|
+
const modelName = _aiModelsValidations.CLAUDE_MODELS_MAPPINGS[model] ?? model;
|
|
65
|
+
const maxTokens = _aiModelsValidations.MAX_TOKENS_OVERRIDES[modelName] ?? _constants.DEFAULT_CLAUDE_MODEL_MAX_TOKEN;
|
|
66
|
+
const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
|
|
67
|
+
max_tokens: maxTokens,
|
|
68
|
+
temperature: 0,
|
|
69
|
+
system: `${systemMessage ?? ""}`,
|
|
70
|
+
messages: [{
|
|
71
|
+
role: "user",
|
|
72
|
+
content
|
|
73
|
+
}],
|
|
74
|
+
model: modelName,
|
|
75
|
+
tools: [{
|
|
76
|
+
input_schema: processedJsonSchema,
|
|
77
|
+
name: toolName,
|
|
78
|
+
description: `Extract ` + entityName + ` mentioned in the text or images. Relay on the parameters for more info. always use this tool when you are asked to extract data. you should respect the provided schema even if it was multiple levels deep, undefined is not an accepted input, please never respond with it.`
|
|
79
|
+
}, {
|
|
80
|
+
name: "no_data_found",
|
|
81
|
+
description: `you should call this tool you are asked to extract data using ` + toolName + ` and you couldn't find any data, make this your last resort, if you are sure that there is no data in the text or images.`,
|
|
82
|
+
input_schema: {
|
|
83
|
+
type: "object",
|
|
84
|
+
properties: null
|
|
85
|
+
}
|
|
86
|
+
}]
|
|
87
|
+
}, {
|
|
88
|
+
headers: (0, _getAiTrackingHeaders.getAiTrackingHeaders)({
|
|
89
|
+
environment: (0, _environmentVariables.getEnvironmentVariable)("RUN_ENVIRONMENT"),
|
|
90
|
+
type: "DYNAMIC_LIST",
|
|
91
|
+
runId: (_getExecutionContext = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext === void 0 ? void 0 : _getExecutionContext.runId,
|
|
92
|
+
jobId: (_getExecutionContext2 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext2 === void 0 ? void 0 : _getExecutionContext2.jobId,
|
|
93
|
+
jobRunId: (_getExecutionContext3 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext3 === void 0 ? void 0 : _getExecutionContext3.jobRunId,
|
|
94
|
+
queueId: (_getExecutionContext4 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext4 === void 0 ? void 0 : _getExecutionContext4.queueId
|
|
95
|
+
})
|
|
96
|
+
}).withResponse(), error => {
|
|
97
|
+
var _typedError$error, _typedError$error2;
|
|
98
|
+
const typedError = error;
|
|
99
|
+
if ((typedError === null || typedError === void 0 ? void 0 : typedError.status) === 449) {
|
|
100
|
+
return Errors.insufficientAiCredits(typedError.error.error);
|
|
101
|
+
}
|
|
102
|
+
if ((typedError === null || typedError === void 0 ? void 0 : typedError.status) === 413) {
|
|
103
|
+
return Errors.AiCallFailed("content should not exceed 10mb", error);
|
|
104
|
+
}
|
|
105
|
+
if ((typedError === null || typedError === void 0 ? void 0 : typedError.status) === 400 && typeof (typedError === null || typedError === void 0 || (_typedError$error = typedError.error) === null || _typedError$error === void 0 ? void 0 : _typedError$error.error) === "object" && typedError !== null && typedError !== void 0 && (_typedError$error2 = typedError.error) !== null && _typedError$error2 !== void 0 && (_typedError$error2 = _typedError$error2.error) !== null && _typedError$error2 !== void 0 && (_typedError$error2 = _typedError$error2.message) !== null && _typedError$error2 !== void 0 && _typedError$error2.includes("prompt is too long:")) {
|
|
106
|
+
return Errors.AiCallFailed("content exceeded model max capacity", error);
|
|
107
|
+
}
|
|
108
|
+
return Errors.AiCallFailed(`Failed to call claude api with status ${typedError === null || typedError === void 0 ? void 0 : typedError.status}: ${error.message}`, error);
|
|
109
|
+
});
|
|
110
|
+
if (response.isErr()) {
|
|
111
|
+
return (0, _neverthrow.err)(response.error);
|
|
112
|
+
}
|
|
113
|
+
const unwrappedResponse = response.value.data;
|
|
114
|
+
if (unwrappedResponse.stop_reason === "max_tokens") {
|
|
115
|
+
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
116
|
+
}
|
|
117
|
+
if (unwrappedResponse.stop_reason !== "tool_use") {
|
|
118
|
+
return (0, _neverthrow.err)(Errors.NoToolUsage("the model was not able to extract data correctly, please try to modify your prompt and schema to give more context."));
|
|
119
|
+
}
|
|
120
|
+
const noDataFound = unwrappedResponse.content.some(content => content.type === "tool_use" && content.name == "no_data_found");
|
|
121
|
+
if (noDataFound) {
|
|
122
|
+
return (0, _neverthrow.err)(Errors.NoDataFound("data isn't found in the text or images."));
|
|
123
|
+
}
|
|
124
|
+
const tool = unwrappedResponse.content.find(content => content.type === "tool_use" && content.name == toolName);
|
|
125
|
+
if (!tool) {
|
|
126
|
+
return (0, _neverthrow.err)(Errors.NoToolUsage("the model was not able to extract data correctly, please try to modify your prompt and schema to give more context."));
|
|
127
|
+
}
|
|
128
|
+
if (!tool.input) {
|
|
129
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult("the model was not able to extract data correctly, please try to modify your prompt and schema to give more context."));
|
|
130
|
+
}
|
|
131
|
+
if (originalJsonSchema.type === "array" && tool.input[entityName] === "undefined") {
|
|
132
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult("the model was not able to extract data correctly"));
|
|
133
|
+
}
|
|
134
|
+
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, tool.input);
|
|
135
|
+
const callCost = response.value.response.headers.get("x-ai-cost-in-cents");
|
|
136
|
+
if (input.logAiCallCost) {
|
|
137
|
+
if (apiKey) {
|
|
138
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
139
|
+
} else if (callCost) {
|
|
140
|
+
const cost = parseFloat(callCost);
|
|
141
|
+
if (!isNaN(cost)) {
|
|
142
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return (0, _neverthrow.ok)({
|
|
147
|
+
result
|
|
148
|
+
});
|
|
149
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingGoogle = extractStructuredDataUsingGoogle;
|
|
7
|
+
var _aiModelsValidations = require("../../../common/aiModelsValidations");
|
|
8
|
+
var _extractStrucutredDataUsingAiInstance = require("./extractStrucutredDataUsingAiInstance");
|
|
9
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
10
|
+
var _neverthrow = require("neverthrow");
|
|
11
|
+
var _google = require("@ai-sdk/google");
|
|
12
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
13
|
+
async function extractStructuredDataUsingGoogle(input) {
|
|
14
|
+
var _input$text;
|
|
15
|
+
if (!input.apiKey) {
|
|
16
|
+
return (0, _neverthrow.err)(Errors.invalidInput("Google AI is only supported with a custom API key. Please provide it or use a different AI provider."));
|
|
17
|
+
}
|
|
18
|
+
let model = input.model;
|
|
19
|
+
if (input.model in _aiModelsValidations.GOOGLE_MODELS_MAPPINGS) {
|
|
20
|
+
model = _aiModelsValidations.GOOGLE_MODELS_MAPPINGS[input.model];
|
|
21
|
+
}
|
|
22
|
+
const googleGenAi = (0, _google.createGoogleGenerativeAI)({
|
|
23
|
+
apiKey: input.apiKey
|
|
24
|
+
});
|
|
25
|
+
const extraMessages = input.extraUserMessages ?? [];
|
|
26
|
+
if (extraMessages.length === 0 && (((_input$text = input.text) === null || _input$text === void 0 ? void 0 : _input$text.length) ?? 0) === 0) {
|
|
27
|
+
var _input$images;
|
|
28
|
+
extraMessages.push("Use the following image" + ((((_input$images = input.images) === null || _input$images === void 0 ? void 0 : _input$images.length) ?? 0) > 1 ? "s" : ""));
|
|
29
|
+
}
|
|
30
|
+
return (0, _extractStrucutredDataUsingAiInstance.extractStructuredDataUsingAiInstance)({
|
|
31
|
+
...input,
|
|
32
|
+
extraUserMessages: extraMessages,
|
|
33
|
+
model: googleGenAi(model),
|
|
34
|
+
apiName: "google",
|
|
35
|
+
maxTokens: _aiModelsValidations.MAX_TOKENS_OVERRIDES[model]
|
|
36
|
+
});
|
|
37
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingOpenAi = extractStructuredDataUsingOpenAi;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
9
|
+
var _utils = require("./utils");
|
|
10
|
+
var _getAiTrackingHeaders = require("../../../common/eventTracking/getAiTrackingHeaders");
|
|
11
|
+
var _environmentVariables = require("../../../common/environmentVariables");
|
|
12
|
+
var _runtime = require("@intuned/runtime");
|
|
13
|
+
var _Logger = require("../../../common/Logger");
|
|
14
|
+
var _aiModelsValidations = require("../../../common/aiModelsValidations");
|
|
15
|
+
var _openAiClients = require("../../openAiClients");
|
|
16
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
17
|
+
async function extractStructuredDataUsingOpenAi(input) {
|
|
18
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3, _getExecutionContext4, _completion$value$dat, _completion$value$dat2;
|
|
19
|
+
const {
|
|
20
|
+
entityName,
|
|
21
|
+
model,
|
|
22
|
+
jsonSchema: originalJsonSchema,
|
|
23
|
+
systemMessage,
|
|
24
|
+
text,
|
|
25
|
+
extraUserMessages,
|
|
26
|
+
images,
|
|
27
|
+
apiKey
|
|
28
|
+
} = input;
|
|
29
|
+
if (images && images.length > 0 && model === "gpt3.5-turbo") {
|
|
30
|
+
return (0, _neverthrow.err)(Errors.invalidInput("Images are only supported for gpt4-turbo and gpt-4o models"));
|
|
31
|
+
}
|
|
32
|
+
const processedJsonSchema = (0, _utils.processInputSchema)(originalJsonSchema, entityName);
|
|
33
|
+
const content = [];
|
|
34
|
+
if (extraUserMessages) {
|
|
35
|
+
content.push(...extraUserMessages.map(message => ({
|
|
36
|
+
type: "text",
|
|
37
|
+
text: message
|
|
38
|
+
})));
|
|
39
|
+
}
|
|
40
|
+
if (text) {
|
|
41
|
+
content.push(...text.map(t => ({
|
|
42
|
+
type: "text",
|
|
43
|
+
text: t
|
|
44
|
+
})));
|
|
45
|
+
}
|
|
46
|
+
if (images) {
|
|
47
|
+
const imageContent = images.map(image => ({
|
|
48
|
+
type: "image_url",
|
|
49
|
+
image_url: {
|
|
50
|
+
url: `data:image/${image.image_type};base64,${image.data.toString("base64")}`,
|
|
51
|
+
detail: "high"
|
|
52
|
+
}
|
|
53
|
+
}));
|
|
54
|
+
content.push(...imageContent);
|
|
55
|
+
}
|
|
56
|
+
const modelName = _aiModelsValidations.GPT_MODELS_MAPPINGS[input.model] ?? input.model;
|
|
57
|
+
const toolName = `extract_${entityName}`;
|
|
58
|
+
const openAiInstance = (0, _openAiClients.createOpenAIInstance)({
|
|
59
|
+
apiKey
|
|
60
|
+
});
|
|
61
|
+
const completion = await (0, _neverthrow.fromPromise)(openAiInstance.chat.completions.create({
|
|
62
|
+
max_tokens: 4000,
|
|
63
|
+
temperature: 0,
|
|
64
|
+
model: modelName,
|
|
65
|
+
messages: [{
|
|
66
|
+
role: "system",
|
|
67
|
+
content: `${systemMessage ?? ""}, ` + `using the ` + (originalJsonSchema.description ? `. Here is more info about the entity that we are trying to extract: ` + originalJsonSchema.description : "")
|
|
68
|
+
}, {
|
|
69
|
+
role: "user",
|
|
70
|
+
content
|
|
71
|
+
}],
|
|
72
|
+
function_call: {
|
|
73
|
+
name: toolName
|
|
74
|
+
},
|
|
75
|
+
functions: [{
|
|
76
|
+
name: "no_data_found",
|
|
77
|
+
description: `you should call this tool you are asked to extract data using ` + toolName + ` and you couldn't find any data, make this your last resort, if you are sure that there is no data in the text or images`
|
|
78
|
+
}, {
|
|
79
|
+
name: toolName,
|
|
80
|
+
description: `Extract ` + entityName + ` mentioned in the text and images. Relay on the parameters for more info.`,
|
|
81
|
+
parameters: processedJsonSchema
|
|
82
|
+
}]
|
|
83
|
+
}, {
|
|
84
|
+
headers: (0, _getAiTrackingHeaders.getAiTrackingHeaders)({
|
|
85
|
+
environment: (0, _environmentVariables.getEnvironmentVariable)("RUN_ENVIRONMENT"),
|
|
86
|
+
type: "DYNAMIC_LIST",
|
|
87
|
+
runId: (_getExecutionContext = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext === void 0 ? void 0 : _getExecutionContext.runId,
|
|
88
|
+
jobId: (_getExecutionContext2 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext2 === void 0 ? void 0 : _getExecutionContext2.jobId,
|
|
89
|
+
jobRunId: (_getExecutionContext3 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext3 === void 0 ? void 0 : _getExecutionContext3.jobRunId,
|
|
90
|
+
queueId: (_getExecutionContext4 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext4 === void 0 ? void 0 : _getExecutionContext4.queueId
|
|
91
|
+
})
|
|
92
|
+
}).withResponse(), error => {
|
|
93
|
+
const typedError = error;
|
|
94
|
+
if ((typedError === null || typedError === void 0 ? void 0 : typedError.status) === 449) {
|
|
95
|
+
var _typedError$error;
|
|
96
|
+
return Errors.insufficientAiCredits(typedError === null || typedError === void 0 || (_typedError$error = typedError.error) === null || _typedError$error === void 0 ? void 0 : _typedError$error.error);
|
|
97
|
+
}
|
|
98
|
+
if ((error === null || error === void 0 ? void 0 : error.status) === 400 && ["context_length_exceeded", "string_above_max_length"].includes(error === null || error === void 0 ? void 0 : error.code)) {
|
|
99
|
+
return Errors.AiCallFailed("content exceeded model max capacity", error);
|
|
100
|
+
}
|
|
101
|
+
return Errors.AiCallFailed(`Failed to call openai api with status ${typedError === null || typedError === void 0 ? void 0 : typedError.status}: ${error.message}`, error);
|
|
102
|
+
});
|
|
103
|
+
if (completion.isErr()) {
|
|
104
|
+
return (0, _neverthrow.err)(completion.error);
|
|
105
|
+
}
|
|
106
|
+
if (completion.value.data.choices[0].finish_reason === "length") {
|
|
107
|
+
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
108
|
+
}
|
|
109
|
+
const noDataFound = (_completion$value$dat = completion.value.data.choices[0].message.tool_calls) === null || _completion$value$dat === void 0 ? void 0 : _completion$value$dat.some(content => content.type === "function" && content.function.name == "no_data_found");
|
|
110
|
+
if (noDataFound) {
|
|
111
|
+
return (0, _neverthrow.err)(Errors.NoDataFound("data isn't found in the text or images."));
|
|
112
|
+
}
|
|
113
|
+
let functionCall = (_completion$value$dat2 = completion.value.data.choices[0].message.tool_calls) === null || _completion$value$dat2 === void 0 || (_completion$value$dat2 = _completion$value$dat2.find(t => t.type === "function" && t.function.name === toolName)) === null || _completion$value$dat2 === void 0 ? void 0 : _completion$value$dat2.function;
|
|
114
|
+
if (!functionCall) {
|
|
115
|
+
functionCall = completion.value.data.choices[0].message.function_call;
|
|
116
|
+
if (!functionCall) {
|
|
117
|
+
return (0, _neverthrow.err)(Errors.NoToolUsage("the model was not able to extract data correctly."));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
const extractedData = functionCall.arguments;
|
|
121
|
+
if (!extractedData) return (0, _neverthrow.err)(Errors.invalidExtractionResult("No extraction result found."));
|
|
122
|
+
const escapedString = extractedData.replace(/[\t\n\r\f\v]/g, " ");
|
|
123
|
+
const jsonParse = (0, _neverthrow.fromThrowable)(JSON.parse, () => Errors.invalidExtractionResult("Failed to parse extraction result."));
|
|
124
|
+
const parsedData = jsonParse(escapedString);
|
|
125
|
+
if (parsedData.isErr()) {
|
|
126
|
+
return (0, _neverthrow.err)(parsedData.error);
|
|
127
|
+
}
|
|
128
|
+
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, parsedData.value);
|
|
129
|
+
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
130
|
+
const callCost = completion.value.response.headers.get("x-ai-cost-in-cents");
|
|
131
|
+
if (input.logAiCallCost) {
|
|
132
|
+
if (apiKey) {
|
|
133
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
134
|
+
} else if (callCost) {
|
|
135
|
+
const cost = parseFloat(callCost);
|
|
136
|
+
if (!isNaN(cost)) {
|
|
137
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return (0, _neverthrow.ok)({
|
|
142
|
+
result: formatted
|
|
143
|
+
});
|
|
144
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingAiInstance = extractStructuredDataUsingAiInstance;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
9
|
+
var _utils = require("./utils");
|
|
10
|
+
var _Logger = require("../../../common/Logger");
|
|
11
|
+
var _ai = require("ai");
|
|
12
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
13
|
+
async function extractStructuredDataUsingAiInstance(input) {
|
|
14
|
+
var _apiResult$value$tool, _apiResult$value$tool2, _apiResult$value$resp;
|
|
15
|
+
const {
|
|
16
|
+
entityName,
|
|
17
|
+
model,
|
|
18
|
+
jsonSchema: originalJsonSchema,
|
|
19
|
+
systemMessage,
|
|
20
|
+
text,
|
|
21
|
+
extraUserMessages,
|
|
22
|
+
images,
|
|
23
|
+
apiKey,
|
|
24
|
+
apiName,
|
|
25
|
+
maxTokens
|
|
26
|
+
} = input;
|
|
27
|
+
const processedJsonSchema = (0, _utils.processInputSchema)(originalJsonSchema, entityName);
|
|
28
|
+
const content = [];
|
|
29
|
+
if (extraUserMessages) {
|
|
30
|
+
content.push(...extraUserMessages.map(message => ({
|
|
31
|
+
type: "text",
|
|
32
|
+
text: message
|
|
33
|
+
})));
|
|
34
|
+
}
|
|
35
|
+
if (text) {
|
|
36
|
+
content.push(...text.map(t => ({
|
|
37
|
+
type: "text",
|
|
38
|
+
text: t
|
|
39
|
+
})));
|
|
40
|
+
}
|
|
41
|
+
if (images) {
|
|
42
|
+
const imageContent = images.map(image => ({
|
|
43
|
+
type: "image",
|
|
44
|
+
image: image.data,
|
|
45
|
+
mimeType: image.image_type
|
|
46
|
+
}));
|
|
47
|
+
content.push(...imageContent);
|
|
48
|
+
}
|
|
49
|
+
const toolName = `extract_${entityName}`;
|
|
50
|
+
const apiResult = await (0, _neverthrow.fromPromise)((0, _ai.generateText)({
|
|
51
|
+
model,
|
|
52
|
+
maxOutputTokens: maxTokens ?? 4000,
|
|
53
|
+
temperature: 0,
|
|
54
|
+
messages: [{
|
|
55
|
+
role: "system",
|
|
56
|
+
content: `${systemMessage ?? ""}, ` + `using the ` + (originalJsonSchema.description ? `. Here is more info about the entity that we are trying to extract: ` + originalJsonSchema.description : "")
|
|
57
|
+
}, {
|
|
58
|
+
role: "user",
|
|
59
|
+
content
|
|
60
|
+
}],
|
|
61
|
+
toolChoice: "required",
|
|
62
|
+
tools: {
|
|
63
|
+
[toolName]: (0, _ai.tool)({
|
|
64
|
+
description: `Extract ${entityName} mentioned in the text and images. Rely on the parameters for more info.`,
|
|
65
|
+
inputSchema: (0, _ai.jsonSchema)(processedJsonSchema),
|
|
66
|
+
type: "function"
|
|
67
|
+
}),
|
|
68
|
+
no_data_found: {
|
|
69
|
+
description: `you should call this tool you are asked to extract data using ${toolName} and you couldn't find any data, make this your last resort, if you are sure that there is no data in the text or images`,
|
|
70
|
+
type: "function",
|
|
71
|
+
inputSchema: (0, _ai.jsonSchema)({
|
|
72
|
+
type: "object",
|
|
73
|
+
properties: {}
|
|
74
|
+
})
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}), error => {
|
|
78
|
+
if (error instanceof _ai.APICallError) {
|
|
79
|
+
const responseBodyStr = error.responseBody;
|
|
80
|
+
const responseBodyResult = (0, _neverthrow.fromThrowable)(JSON.parse, () => null)(responseBodyStr ?? "");
|
|
81
|
+
const responseBody = responseBodyResult.isOk() ? responseBodyResult.value : null;
|
|
82
|
+
if ((error === null || error === void 0 ? void 0 : error.statusCode) === 449) {
|
|
83
|
+
return Errors.insufficientAiCredits(responseBody === null || responseBody === void 0 ? void 0 : responseBody.error);
|
|
84
|
+
}
|
|
85
|
+
return Errors.AiCallFailed(`Failed to call ${apiName} api with status ${error === null || error === void 0 ? void 0 : error.statusCode}: ${error.message}`, error);
|
|
86
|
+
}
|
|
87
|
+
return Errors.AiCallFailed(`Failed to call ${apiName} api: ${error.message}`, error);
|
|
88
|
+
});
|
|
89
|
+
if (apiResult.isErr()) {
|
|
90
|
+
return (0, _neverthrow.err)(apiResult.error);
|
|
91
|
+
}
|
|
92
|
+
if (apiResult.value.finishReason === "length") {
|
|
93
|
+
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
94
|
+
}
|
|
95
|
+
const noDataFound = (_apiResult$value$tool = apiResult.value.toolCalls) === null || _apiResult$value$tool === void 0 ? void 0 : _apiResult$value$tool.some(content => content.toolName === "no_data_found");
|
|
96
|
+
if (noDataFound) {
|
|
97
|
+
return (0, _neverthrow.err)(Errors.NoDataFound("data isn't found in the text or images."));
|
|
98
|
+
}
|
|
99
|
+
const toolResult = (_apiResult$value$tool2 = apiResult.value.toolCalls) === null || _apiResult$value$tool2 === void 0 ? void 0 : _apiResult$value$tool2.find(content => content.toolName === toolName);
|
|
100
|
+
if (!toolResult) {
|
|
101
|
+
return (0, _neverthrow.err)(Errors.NoToolUsage("the model was not able to extract data correctly."));
|
|
102
|
+
}
|
|
103
|
+
const extractedData = toolResult.input;
|
|
104
|
+
if (!extractedData) {
|
|
105
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult("No extraction result found."));
|
|
106
|
+
}
|
|
107
|
+
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, extractedData);
|
|
108
|
+
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
109
|
+
const callCost = (_apiResult$value$resp = apiResult.value.response.headers) === null || _apiResult$value$resp === void 0 ? void 0 : _apiResult$value$resp["x-ai-cost-in-cents"];
|
|
110
|
+
if (input.logAiCallCost) {
|
|
111
|
+
if (apiKey) {
|
|
112
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
113
|
+
} else if (callCost) {
|
|
114
|
+
const cost = parseFloat(callCost);
|
|
115
|
+
if (!isNaN(cost)) {
|
|
116
|
+
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return (0, _neverthrow.ok)({
|
|
121
|
+
result: formatted
|
|
122
|
+
});
|
|
123
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingAi = extractStructuredDataUsingAi;
|
|
7
|
+
exports.isClaudeModel = isClaudeModel;
|
|
8
|
+
exports.isGoogleModel = isGoogleModel;
|
|
9
|
+
exports.isOpenAiModel = isOpenAiModel;
|
|
10
|
+
var _neverthrow = require("neverthrow");
|
|
11
|
+
var _extractStructuredDataUsingClaude = require("./extractStructuredDataUsingClaude");
|
|
12
|
+
var _extractStructuredDataUsingOpenAi = require("./extractStructuredDataUsingOpenAi");
|
|
13
|
+
var _utils = require("./utils");
|
|
14
|
+
var _aiModelsValidations = require("../../../common/aiModelsValidations");
|
|
15
|
+
var _extractStructuredDataUsingGoogle = require("./extractStructuredDataUsingGoogle");
|
|
16
|
+
function isClaudeModel(model) {
|
|
17
|
+
return _aiModelsValidations.SUPPORTED_CLAUDE_MODELS.includes(model);
|
|
18
|
+
}
|
|
19
|
+
function isGoogleModel(model) {
|
|
20
|
+
return _aiModelsValidations.SUPPORTED_GOOGLE_MODELS.includes(model);
|
|
21
|
+
}
|
|
22
|
+
function isOpenAiModel(model) {
|
|
23
|
+
return _aiModelsValidations.SUPPORTED_GPT_MODELS.includes(model);
|
|
24
|
+
}
|
|
25
|
+
async function extractStructuredDataUsingAi(input) {
|
|
26
|
+
let extractionResult;
|
|
27
|
+
const {
|
|
28
|
+
model
|
|
29
|
+
} = input;
|
|
30
|
+
if (isOpenAiModel(model)) {
|
|
31
|
+
extractionResult = await (0, _extractStructuredDataUsingOpenAi.extractStructuredDataUsingOpenAi)({
|
|
32
|
+
...input,
|
|
33
|
+
model
|
|
34
|
+
});
|
|
35
|
+
} else if (isGoogleModel(model)) {
|
|
36
|
+
extractionResult = await (0, _extractStructuredDataUsingGoogle.extractStructuredDataUsingGoogle)({
|
|
37
|
+
...input,
|
|
38
|
+
model
|
|
39
|
+
});
|
|
40
|
+
} else {
|
|
41
|
+
extractionResult = await (0, _extractStructuredDataUsingClaude.extractStructuredDataUsingClaude)({
|
|
42
|
+
...input,
|
|
43
|
+
model
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
if (extractionResult.isErr()) {
|
|
47
|
+
return (0, _neverthrow.err)(extractionResult.error);
|
|
48
|
+
}
|
|
49
|
+
const {
|
|
50
|
+
result
|
|
51
|
+
} = extractionResult.value;
|
|
52
|
+
return (0, _neverthrow.ok)({
|
|
53
|
+
result: (0, _utils.cleanupAiResult)(result)
|
|
54
|
+
});
|
|
55
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.isTableHeaderOrFooter = isTableHeaderOrFooter;
|
|
7
|
+
var _AnthropicClient = require("../../AnthropicClient");
|
|
8
|
+
var _zod = require("zod");
|
|
9
|
+
var _neverthrow = require("neverthrow");
|
|
10
|
+
var Errors = _interopRequireWildcard(require("./errors"));
|
|
11
|
+
var _getAiTrackingHeaders = require("../../../common/eventTracking/getAiTrackingHeaders");
|
|
12
|
+
var _environmentVariables = require("../../../common/environmentVariables");
|
|
13
|
+
var _runtime = require("@intuned/runtime");
|
|
14
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
15
|
+
async function isTableHeaderOrFooter(content, identifier) {
|
|
16
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3, _getExecutionContext4, _response$error;
|
|
17
|
+
if (!content) {
|
|
18
|
+
return (0, _neverthrow.ok)({
|
|
19
|
+
isHeader: false
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
const anthropic = (0, _AnthropicClient.createAnthropicInstance)();
|
|
23
|
+
const itemContent = typeof content === "string" ? {
|
|
24
|
+
type: "text",
|
|
25
|
+
text: content
|
|
26
|
+
} : {
|
|
27
|
+
type: "image",
|
|
28
|
+
source: {
|
|
29
|
+
data: content.toString("base64"),
|
|
30
|
+
media_type: "image/png",
|
|
31
|
+
type: "base64"
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
const response = await (0, _neverthrow.fromPromise)(anthropic.messages.create({
|
|
35
|
+
max_tokens: 4096,
|
|
36
|
+
temperature: 0,
|
|
37
|
+
messages: [{
|
|
38
|
+
role: "user",
|
|
39
|
+
content: [{
|
|
40
|
+
type: "text",
|
|
41
|
+
text: `given the following content, you need to decide if the content is a html table header or a table footer,
|
|
42
|
+
a table header is a row that contains labels for table columns, and footer usually has pagination information or summary of the table`
|
|
43
|
+
}, itemContent]
|
|
44
|
+
}],
|
|
45
|
+
model: "claude-3-haiku-20240307",
|
|
46
|
+
tools: [{
|
|
47
|
+
input_schema: {
|
|
48
|
+
type: "object",
|
|
49
|
+
properties: {
|
|
50
|
+
isTableHeaderOrFooter: {
|
|
51
|
+
type: "boolean"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
required: ["isTableHeaderOrFooter"]
|
|
55
|
+
},
|
|
56
|
+
name: "is_table_header_or_footer",
|
|
57
|
+
description: `given a text or image content, decide if the content is a table header or footer or not.`
|
|
58
|
+
}]
|
|
59
|
+
}, {
|
|
60
|
+
headers: (0, _getAiTrackingHeaders.getAiTrackingHeaders)({
|
|
61
|
+
environment: (0, _environmentVariables.getEnvironmentVariable)("RUN_ENVIRONMENT"),
|
|
62
|
+
type: "DYNAMIC_LIST",
|
|
63
|
+
runId: (_getExecutionContext = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext === void 0 ? void 0 : _getExecutionContext.runId,
|
|
64
|
+
jobId: (_getExecutionContext2 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext2 === void 0 ? void 0 : _getExecutionContext2.jobId,
|
|
65
|
+
jobRunId: (_getExecutionContext3 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext3 === void 0 ? void 0 : _getExecutionContext3.jobRunId,
|
|
66
|
+
queueId: (_getExecutionContext4 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext4 === void 0 ? void 0 : _getExecutionContext4.queueId
|
|
67
|
+
})
|
|
68
|
+
}), error => {
|
|
69
|
+
const typedError = error;
|
|
70
|
+
if (typedError.status === 449) return Errors.insufficientAiCredits(`🔴 ${typedError.error.error}`);
|
|
71
|
+
});
|
|
72
|
+
if (response.isErr() && ((_response$error = response.error) === null || _response$error === void 0 ? void 0 : _response$error.type) === "InsufficientAiCredits") {
|
|
73
|
+
return (0, _neverthrow.err)(response.error);
|
|
74
|
+
}
|
|
75
|
+
if (response.isErr()) {
|
|
76
|
+
return (0, _neverthrow.ok)({
|
|
77
|
+
isHeader: false
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
const unwrappedResponse = response.value;
|
|
81
|
+
const tool = unwrappedResponse.content.find(t => t.type === "tool_use");
|
|
82
|
+
if (!tool) return (0, _neverthrow.ok)({
|
|
83
|
+
isHeader: false
|
|
84
|
+
});
|
|
85
|
+
const headersParsingResults = _zod.z.object({
|
|
86
|
+
isTableHeaderOrFooter: _zod.z.boolean()
|
|
87
|
+
}).safeParse(tool.input);
|
|
88
|
+
if (!headersParsingResults.success) {
|
|
89
|
+
return (0, _neverthrow.ok)({
|
|
90
|
+
isHeader: false
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
return (0, _neverthrow.ok)({
|
|
94
|
+
isHeader: headersParsingResults.data.isTableHeaderOrFooter
|
|
95
|
+
});
|
|
96
|
+
}
|