@intuned/browser-dev 2.2.3-unify-sdks.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai-extractors/AnthropicClient/index.js +23 -0
- package/dist/ai-extractors/export.d.js +5 -0
- package/dist/ai-extractors/export.d.ts +422 -0
- package/dist/ai-extractors/extractStructuredData.js +79 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +7 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +42 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +149 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +144 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +123 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +96 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +5 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +53 -0
- package/dist/ai-extractors/extractionHelpers/types.js +5 -0
- package/dist/ai-extractors/fileExtractors.js +176 -0
- package/dist/ai-extractors/index.js +31 -0
- package/dist/ai-extractors/jsonSchema.d.js +5 -0
- package/dist/ai-extractors/jsonSchema.d.ts +49 -0
- package/dist/ai-extractors/openAiClients/index.js +23 -0
- package/dist/ai-extractors/validators.js +239 -0
- package/dist/browser/ai/export.d.js +3 -0
- package/dist/browser/ai/export.d.ts +587 -0
- package/dist/browser/ai/extractMarkdown.js +15 -0
- package/dist/browser/ai/extractStructuredData.js +231 -0
- package/dist/browser/ai/extractStructuredDataUsingAi.js +140 -0
- package/dist/browser/ai/extractionHelpers/screenshotHelpers.js +55 -0
- package/dist/browser/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/browser/ai/index.d.ts +587 -0
- package/dist/browser/ai/index.js +19 -0
- package/dist/browser/ai/isPageLoaded.js +67 -0
- package/dist/browser/ai/prompt.js +39 -0
- package/dist/browser/ai/tests/testCheckAllTypesAreStrings.spec.js +143 -0
- package/dist/browser/ai/tests/testExtractStructuredData.spec.js +622 -0
- package/dist/browser/ai/tools/index.js +48 -0
- package/dist/browser/ai/types/errors.js +67 -0
- package/dist/browser/ai/types/models.js +45 -0
- package/dist/browser/ai/types/types.js +48 -0
- package/dist/browser/ai/validators.js +136 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +50 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +17 -0
- package/dist/common/environmentVariables.js +16 -0
- package/dist/common/eventTracking/getAiTrackingHeaders.js +31 -0
- package/dist/common/eventTracking/getFileTrackingHeaders.js +23 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +1 -0
- package/dist/helpers/export.d.ts +1294 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +93 -0
- package/dist/helpers/index.d.ts +1294 -0
- package/dist/helpers/index.js +115 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +63 -0
- package/dist/helpers/sanitizeHtml.js +73 -0
- package/dist/helpers/saveFileToS3.js +46 -0
- package/dist/helpers/scrollToLoadContent.js +50 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +197 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testIsPageLoaded.spec.js +285 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testSimplifyHtml.spec.js +251 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +380 -0
- package/dist/helpers/tests/testWaitForDomSettled.spec.js +169 -0
- package/dist/helpers/tests/testWaitForNetworkIdle.spec.js +115 -0
- package/dist/helpers/types/Attachment.js +81 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +153 -0
- package/dist/helpers/utils/getS3Client.js +21 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +119 -0
- package/dist/helpers/waitForDomSettled.js +182 -0
- package/dist/helpers/waitForNetworkIdle.js +191 -0
- package/dist/index.d.js +82 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +84 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +87 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +221 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +149 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +145 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +175 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +97 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.buildImagesFromPageOrHandle = buildImagesFromPageOrHandle;
|
|
7
|
+
exports.captureFullPageImagesWithOverlap = captureFullPageImagesWithOverlap;
|
|
8
|
+
var _neverthrow = require("neverthrow");
|
|
9
|
+
var errors = _interopRequireWildcard(require("./errors"));
|
|
10
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
11
|
+
async function captureFullPageImagesWithOverlap(page, options = {
|
|
12
|
+
overlap: 200,
|
|
13
|
+
sliceHeight: 1000
|
|
14
|
+
}) {
|
|
15
|
+
const totalHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
16
|
+
let currentHeight = 0;
|
|
17
|
+
const buffers = [];
|
|
18
|
+
while (currentHeight < totalHeight) {
|
|
19
|
+
if (buffers.length > 10) {
|
|
20
|
+
console.info(`the page is too long, only first ${totalHeight} px of the page will be captured.`);
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
await page.setViewportSize({
|
|
24
|
+
width: 1200,
|
|
25
|
+
height: options.sliceHeight
|
|
26
|
+
});
|
|
27
|
+
await page.evaluate(y => window.scrollTo(0, y), currentHeight - (currentHeight > 0 ? options.overlap : 0));
|
|
28
|
+
await page.waitForTimeout(500);
|
|
29
|
+
const buffer = await page.screenshot();
|
|
30
|
+
buffers.push(buffer);
|
|
31
|
+
currentHeight += options.sliceHeight - options.overlap;
|
|
32
|
+
}
|
|
33
|
+
return buffers;
|
|
34
|
+
}
|
|
35
|
+
async function buildImagesFromPageOrHandle(page, searchRegionHandler) {
|
|
36
|
+
const originalViewPortSize = page.viewportSize();
|
|
37
|
+
await page.setViewportSize({
|
|
38
|
+
width: 1200,
|
|
39
|
+
height: 800
|
|
40
|
+
});
|
|
41
|
+
if (searchRegionHandler) {
|
|
42
|
+
const size = await searchRegionHandler.boundingBox();
|
|
43
|
+
if (!size) {
|
|
44
|
+
return (0, _neverthrow.err)(errors.other("the provided search region is very large, image extraction support up to 5000px height."));
|
|
45
|
+
}
|
|
46
|
+
return (0, _neverthrow.ok)([await searchRegionHandler.screenshot({
|
|
47
|
+
type: "png"
|
|
48
|
+
})]);
|
|
49
|
+
}
|
|
50
|
+
const fullPageImages = await captureFullPageImagesWithOverlap(page);
|
|
51
|
+
if (originalViewPortSize) {
|
|
52
|
+
await page.setViewportSize(originalViewPortSize);
|
|
53
|
+
}
|
|
54
|
+
return (0, _neverthrow.ok)(fullPageImages);
|
|
55
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.cleanupAiResult = cleanupAiResult;
|
|
7
|
+
exports.getResultFromOutputSchema = getResultFromOutputSchema;
|
|
8
|
+
exports.processInputSchema = processInputSchema;
|
|
9
|
+
function processInputSchema(originalJsonSchema, entityName) {
|
|
10
|
+
const internalSchema = structuredClone(originalJsonSchema);
|
|
11
|
+
delete internalSchema.description;
|
|
12
|
+
if (originalJsonSchema.type === "array") {
|
|
13
|
+
return {
|
|
14
|
+
type: "object",
|
|
15
|
+
properties: {
|
|
16
|
+
[`number_of_${entityName}`]: {
|
|
17
|
+
type: "number",
|
|
18
|
+
description: `The number of ${entityName} items in the text - not the overall total. Relay on the text to find this, if the number is not mentioned in the text, this should be null. For example, some lists say 'showing 5 our of 20 items' - 5 is the number of items in the list.`
|
|
19
|
+
},
|
|
20
|
+
[`${entityName}`]: internalSchema
|
|
21
|
+
},
|
|
22
|
+
required: [`number_of_${entityName}`, `${entityName}`]
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
return internalSchema;
|
|
26
|
+
}
|
|
27
|
+
function getResultFromOutputSchema(originalJsonSchema, entityName, parsedData) {
|
|
28
|
+
if (originalJsonSchema.type === "array") {
|
|
29
|
+
const items = parsedData[entityName] ?? [];
|
|
30
|
+
return items;
|
|
31
|
+
}
|
|
32
|
+
return parsedData;
|
|
33
|
+
}
|
|
34
|
+
function cleanUpAiReturnedString(str) {
|
|
35
|
+
return str.replaceAll(/&/g, "&");
|
|
36
|
+
}
|
|
37
|
+
function cleanupAiResult(obj) {
|
|
38
|
+
if (typeof obj === "string") {
|
|
39
|
+
return cleanUpAiReturnedString(obj);
|
|
40
|
+
}
|
|
41
|
+
if (typeof obj !== "object" || obj === null) {
|
|
42
|
+
return obj;
|
|
43
|
+
}
|
|
44
|
+
if (Array.isArray(obj)) {
|
|
45
|
+
return obj.map(cleanupAiResult);
|
|
46
|
+
}
|
|
47
|
+
return Object.entries(obj).reduce((acc, [key, value]) => {
|
|
48
|
+
if (value !== null && value !== undefined && value !== "") {
|
|
49
|
+
acc[key] = cleanupAiResult(value);
|
|
50
|
+
}
|
|
51
|
+
return acc;
|
|
52
|
+
}, {});
|
|
53
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractMarkdownFromFile = extractMarkdownFromFile;
|
|
7
|
+
exports.extractStructuredDataFromFile = extractStructuredDataFromFile;
|
|
8
|
+
exports.extractTablesFromFile = extractTablesFromFile;
|
|
9
|
+
var _jwtTokenManager = require("../common/jwtTokenManager");
|
|
10
|
+
var _formatZodError = require("../common/formatZodError");
|
|
11
|
+
var _validators = require("./validators");
|
|
12
|
+
var _getFileTrackingHeaders = require("../common/eventTracking/getFileTrackingHeaders");
|
|
13
|
+
var _environmentVariables = require("../common/environmentVariables");
|
|
14
|
+
var _runtime = require("@intuned/runtime");
|
|
15
|
+
var _zod = require("zod");
|
|
16
|
+
var _Logger = require("../common/Logger");
|
|
17
|
+
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
18
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
19
|
+
const usageInfoSchema = _zod.z.object({
|
|
20
|
+
pagesProcessed: _zod.z.number().optional(),
|
|
21
|
+
aiCostInCents: _zod.z.number().optional()
|
|
22
|
+
});
|
|
23
|
+
const serializeFile = file => {
|
|
24
|
+
if (file.source.type == "buffer") {
|
|
25
|
+
return {
|
|
26
|
+
...file,
|
|
27
|
+
source: {
|
|
28
|
+
type: "base64",
|
|
29
|
+
data: file.source.data.toString("base64")
|
|
30
|
+
}
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
return file;
|
|
34
|
+
};
|
|
35
|
+
async function extractMarkdownFromFile(file, options) {
|
|
36
|
+
const fileParsingErrors = (0, _validators.validateFileSchema)(file);
|
|
37
|
+
if (fileParsingErrors && fileParsingErrors.length > 0) {
|
|
38
|
+
const message = `invalid extractMarkdownFromFile input: ${fileParsingErrors.join("\n")}`;
|
|
39
|
+
throw new Error(message);
|
|
40
|
+
}
|
|
41
|
+
const identifierParsingResult = _validators.labelSchema.safeParse(options.label);
|
|
42
|
+
if (identifierParsingResult.success === false) {
|
|
43
|
+
const message = `invalid identifier: ${(0, _formatZodError.formatZodError)(identifierParsingResult.error).join("\n")}`;
|
|
44
|
+
throw new Error(message);
|
|
45
|
+
}
|
|
46
|
+
const operationId = await initiateExtraction({
|
|
47
|
+
operationName: "toMd",
|
|
48
|
+
label: options.label,
|
|
49
|
+
requestBody: JSON.stringify({
|
|
50
|
+
file: serializeFile(file)
|
|
51
|
+
})
|
|
52
|
+
});
|
|
53
|
+
const pollingResult = await pollForExtractionResult("toMd", operationId);
|
|
54
|
+
if (pollingResult.usage) {
|
|
55
|
+
const usageParsing = usageInfoSchema.safeParse(pollingResult.usage);
|
|
56
|
+
if (usageParsing.success && usageParsing.data.pagesProcessed !== undefined) {
|
|
57
|
+
_Logger.logger.info(`extractMarkdownFromFile with label ${options.label} used ${usageParsing.data.pagesProcessed} pages in file processing`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return pollingResult.result;
|
|
61
|
+
}
|
|
62
|
+
async function extractTablesFromFile(file, options) {
|
|
63
|
+
const fileParsingErrors = (0, _validators.validateFileSchema)(file);
|
|
64
|
+
if (fileParsingErrors && fileParsingErrors.length > 0) {
|
|
65
|
+
const message = `invalid extractTablesFromFile input: ${fileParsingErrors.join("\n")}`;
|
|
66
|
+
throw new Error(message);
|
|
67
|
+
}
|
|
68
|
+
const identifierParsingResult = _validators.labelSchema.safeParse(options.label);
|
|
69
|
+
if (identifierParsingResult.success === false) {
|
|
70
|
+
const message = `invalid identifier: ${(0, _formatZodError.formatZodError)(identifierParsingResult.error).join("\n")}`;
|
|
71
|
+
throw new Error(message);
|
|
72
|
+
}
|
|
73
|
+
const operationId = await initiateExtraction({
|
|
74
|
+
operationName: "extractTables",
|
|
75
|
+
label: options.label,
|
|
76
|
+
requestBody: JSON.stringify({
|
|
77
|
+
file: serializeFile(file)
|
|
78
|
+
})
|
|
79
|
+
});
|
|
80
|
+
const pollingResult = await pollForExtractionResult("extractTables", operationId);
|
|
81
|
+
if (pollingResult.usage) {
|
|
82
|
+
const usageParsing = usageInfoSchema.safeParse(pollingResult.usage);
|
|
83
|
+
if (usageParsing.success && usageParsing.data.pagesProcessed !== undefined) {
|
|
84
|
+
_Logger.logger.info(`extractTablesFromFile with label ${options.label} used ${usageParsing.data.pagesProcessed} pages in file processing`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return pollingResult.result;
|
|
88
|
+
}
|
|
89
|
+
async function extractStructuredDataFromFile(file, options) {
|
|
90
|
+
const fileParsingErrors = (0, _validators.validateFileSchema)(file);
|
|
91
|
+
if (fileParsingErrors && fileParsingErrors.length > 0) {
|
|
92
|
+
const message = `invalid file: ${fileParsingErrors.join("\n")}`;
|
|
93
|
+
throw new Error(message);
|
|
94
|
+
}
|
|
95
|
+
const optionsParsingResult = _validators.extractDataFromFileOptionsSchema.safeParse(options);
|
|
96
|
+
if (!optionsParsingResult.success) {
|
|
97
|
+
const message = `invalid options: ${(0, _formatZodError.formatZodError)(optionsParsingResult.error).join("\n")}`;
|
|
98
|
+
throw new Error(message);
|
|
99
|
+
}
|
|
100
|
+
const operationId = await initiateExtraction({
|
|
101
|
+
operationName: "extractData",
|
|
102
|
+
label: options.label,
|
|
103
|
+
requestBody: JSON.stringify({
|
|
104
|
+
file: serializeFile(file),
|
|
105
|
+
...optionsParsingResult.data
|
|
106
|
+
})
|
|
107
|
+
});
|
|
108
|
+
const pollingResult = await pollForExtractionResult("extractData", operationId);
|
|
109
|
+
if (pollingResult.usage) {
|
|
110
|
+
const usageParsing = usageInfoSchema.safeParse(pollingResult.usage);
|
|
111
|
+
if (usageParsing.success) {
|
|
112
|
+
if (usageParsing.data.pagesProcessed !== undefined) {
|
|
113
|
+
_Logger.logger.info(`extractStructuredDataFromFile with label ${options.label} used ${usageParsing.data.pagesProcessed} pages in file processing`);
|
|
114
|
+
}
|
|
115
|
+
if (usageParsing.data.aiCostInCents !== undefined) {
|
|
116
|
+
_Logger.logger.info(`extractStructuredDataFromFile with label ${options.label} Ai cost is $${usageParsing.data.aiCostInCents / 100}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return pollingResult.result;
|
|
121
|
+
}
|
|
122
|
+
async function initiateExtraction({
|
|
123
|
+
operationName,
|
|
124
|
+
label,
|
|
125
|
+
requestBody
|
|
126
|
+
}) {
|
|
127
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3, _getExecutionContext4;
|
|
128
|
+
const response = await (0, _jwtTokenManager.callBackendFunctionWithToken)(`files/${operationName}/start`, {
|
|
129
|
+
method: "POST",
|
|
130
|
+
headers: {
|
|
131
|
+
"Content-Type": "application/json",
|
|
132
|
+
...(0, _getFileTrackingHeaders.getFilesTrackingHeaders)({
|
|
133
|
+
environment: (0, _environmentVariables.getEnvironmentVariable)("RUN_ENVIRONMENT"),
|
|
134
|
+
runId: (_getExecutionContext = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext === void 0 ? void 0 : _getExecutionContext.runId,
|
|
135
|
+
identifier: label,
|
|
136
|
+
jobId: (_getExecutionContext2 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext2 === void 0 ? void 0 : _getExecutionContext2.jobId,
|
|
137
|
+
jobRunId: (_getExecutionContext3 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext3 === void 0 ? void 0 : _getExecutionContext3.jobRunId,
|
|
138
|
+
queueId: (_getExecutionContext4 = (0, _runtime.getExecutionContext)()) === null || _getExecutionContext4 === void 0 ? void 0 : _getExecutionContext4.queueId
|
|
139
|
+
})
|
|
140
|
+
},
|
|
141
|
+
body: requestBody
|
|
142
|
+
});
|
|
143
|
+
if (!response.ok) {
|
|
144
|
+
throw new Error(`Failed to initiate ${operationName} extraction: HTTP ${response.status} - ${await response.text()}`);
|
|
145
|
+
}
|
|
146
|
+
const responseJson = await response.json();
|
|
147
|
+
const operationId = responseJson === null || responseJson === void 0 ? void 0 : responseJson.operationId;
|
|
148
|
+
if (!operationId) {
|
|
149
|
+
throw new Error("Failed to get operation ID from workflow response");
|
|
150
|
+
}
|
|
151
|
+
return operationId;
|
|
152
|
+
}
|
|
153
|
+
async function pollForExtractionResult(fileOperationType, operationId) {
|
|
154
|
+
return (0, _asyncRetry.default)(async bail => {
|
|
155
|
+
const pollRes = await (0, _jwtTokenManager.callBackendFunctionWithToken)(`files/${fileOperationType}/${operationId}/check`, {
|
|
156
|
+
method: "GET",
|
|
157
|
+
headers: {
|
|
158
|
+
"Content-Type": "application/json"
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
if (!pollRes.ok) {
|
|
162
|
+
bail(new Error(`Failed to check ${fileOperationType} results (non-retryable), ${pollRes.status} ${await pollRes.text()}`));
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
const pollResJson = await pollRes.json();
|
|
166
|
+
if (pollResJson.status === "completed" || pollResJson.status === "failed") {
|
|
167
|
+
return pollResJson;
|
|
168
|
+
}
|
|
169
|
+
throw new Error("Operation not completed yet");
|
|
170
|
+
}, {
|
|
171
|
+
retries: 100,
|
|
172
|
+
factor: 1,
|
|
173
|
+
minTimeout: 3000,
|
|
174
|
+
maxTimeout: 3000
|
|
175
|
+
});
|
|
176
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
Object.defineProperty(exports, "extractMarkdownFromFile", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
get: function () {
|
|
9
|
+
return _fileExtractors.extractMarkdownFromFile;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(exports, "extractStructuredDataFromContent", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return _extractStructuredData.extractStructuredDataFromContent;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(exports, "extractStructuredDataFromFile", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
get: function () {
|
|
21
|
+
return _fileExtractors.extractStructuredDataFromFile;
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(exports, "extractTablesFromFile", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
get: function () {
|
|
27
|
+
return _fileExtractors.extractTablesFromFile;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
var _fileExtractors = require("./fileExtractors");
|
|
31
|
+
var _extractStructuredData = require("./extractStructuredData");
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
export interface BasicSchema {
|
|
2
|
+
type: string;
|
|
3
|
+
description?: string;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export interface StringSchema extends BasicSchema {
|
|
7
|
+
type: "string";
|
|
8
|
+
enum?: string[];
|
|
9
|
+
maxLength?: number;
|
|
10
|
+
minLength?: number;
|
|
11
|
+
pattern?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface NumberSchema extends BasicSchema {
|
|
15
|
+
type: "number" | "integer";
|
|
16
|
+
multipleOf?: number;
|
|
17
|
+
maximum?: number;
|
|
18
|
+
exclusiveMaximum?: number;
|
|
19
|
+
minimum?: number;
|
|
20
|
+
exclusiveMinimum?: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface BooleanSchema extends BasicSchema {
|
|
24
|
+
type: "boolean";
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface ArraySchema extends BasicSchema {
|
|
28
|
+
type: "array";
|
|
29
|
+
items: JsonSchema;
|
|
30
|
+
maxItems?: number;
|
|
31
|
+
minItems?: number;
|
|
32
|
+
uniqueItems?: boolean;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface ObjectSchema extends BasicSchema {
|
|
36
|
+
type: "object";
|
|
37
|
+
properties: Record<string, JsonSchema>;
|
|
38
|
+
required?: string[];
|
|
39
|
+
|
|
40
|
+
maxProperties?: number;
|
|
41
|
+
minProperties?: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export type JsonSchema =
|
|
45
|
+
| StringSchema
|
|
46
|
+
| NumberSchema
|
|
47
|
+
| BooleanSchema
|
|
48
|
+
| ArraySchema
|
|
49
|
+
| ObjectSchema;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.createOpenAIInstance = createOpenAIInstance;
|
|
7
|
+
var _openai = require("openai");
|
|
8
|
+
var _dotenv = require("dotenv");
|
|
9
|
+
var _jwtTokenManager = require("../../common/jwtTokenManager");
|
|
10
|
+
(0, _dotenv.config)();
|
|
11
|
+
function createOpenAIInstance(options) {
|
|
12
|
+
if (options !== null && options !== void 0 && options.apiKey) {
|
|
13
|
+
return new _openai.OpenAI({
|
|
14
|
+
apiKey: options.apiKey
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
const openai = new _openai.OpenAI({
|
|
18
|
+
apiKey: "",
|
|
19
|
+
baseURL: `${process.env.FUNCTIONS_DOMAIN}/api/${process.env.INTUNED_WORKSPACE_ID}/functions/${process.env.INTUNED_INTEGRATION_ID}/openai`,
|
|
20
|
+
fetch: _jwtTokenManager.backendFunctionsTokenManager.fetchWithToken.bind(_jwtTokenManager.backendFunctionsTokenManager)
|
|
21
|
+
});
|
|
22
|
+
return openai;
|
|
23
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.sheetFileSchema = exports.pdfFileSchema = exports.labelSchema = exports.jsonSchema = exports.imageFileSchema = exports.genericExtractDataInputSchema = exports.extractDataInputJsonSchema = exports.extractDataFromFileOptionsSchema = exports.docFileSchema = exports.contentValidationSchema = void 0;
|
|
7
|
+
exports.validateFileSchema = validateFileSchema;
|
|
8
|
+
var _zod = require("zod");
|
|
9
|
+
var _locatorHelpers = require("../common/locatorHelpers");
|
|
10
|
+
var _ajv = _interopRequireDefault(require("ajv"));
|
|
11
|
+
var _formatZodError = require("../common/formatZodError");
|
|
12
|
+
var _aiModelsValidations = require("../common/aiModelsValidations");
|
|
13
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
14
|
+
const labelSchema = exports.labelSchema = _zod.z.string({
|
|
15
|
+
invalid_type_error: "label must be a string",
|
|
16
|
+
required_error: "label is required"
|
|
17
|
+
}).min(1, "label must be at least 1 character long");
|
|
18
|
+
const htmlStrategySchema = _zod.z.object({
|
|
19
|
+
model: _zod.z.enum(_aiModelsValidations.SUPPORTED_TEXT_MODELS, {
|
|
20
|
+
required_error: "strategy model is required",
|
|
21
|
+
invalid_type_error: "strategy model is invalid"
|
|
22
|
+
}),
|
|
23
|
+
type: _zod.z.literal("HTML", {
|
|
24
|
+
required_error: "strategy type is required",
|
|
25
|
+
invalid_type_error: "strategy type is invalid"
|
|
26
|
+
})
|
|
27
|
+
});
|
|
28
|
+
const imageStrategySchema = _zod.z.object({
|
|
29
|
+
model: _zod.z.enum(_aiModelsValidations.SUPPORTED_VISION_MODELS, {
|
|
30
|
+
required_error: "strategy model is required",
|
|
31
|
+
invalid_type_error: "strategy model is invalid"
|
|
32
|
+
}),
|
|
33
|
+
type: _zod.z.literal("IMAGE", {
|
|
34
|
+
required_error: "strategy type is required",
|
|
35
|
+
invalid_type_error: "strategy type is invalid"
|
|
36
|
+
})
|
|
37
|
+
});
|
|
38
|
+
const markdownStrategySchema = _zod.z.object({
|
|
39
|
+
model: _zod.z.enum(_aiModelsValidations.SUPPORTED_TEXT_MODELS, {
|
|
40
|
+
required_error: "strategy model is required",
|
|
41
|
+
invalid_type_error: "strategy model is invalid"
|
|
42
|
+
}),
|
|
43
|
+
type: _zod.z.literal("MARKDOWN", {
|
|
44
|
+
required_error: "strategy type is required",
|
|
45
|
+
invalid_type_error: "strategy type is invalid"
|
|
46
|
+
})
|
|
47
|
+
});
|
|
48
|
+
const strategySchema = _zod.z.union([htmlStrategySchema, imageStrategySchema], {
|
|
49
|
+
errorMap: (err, context) => {
|
|
50
|
+
console.log(context.data);
|
|
51
|
+
if (err.code === "invalid_union" && context.data.model === "gpt3.5-turbo" && context.data.type === "IMAGE") {
|
|
52
|
+
return {
|
|
53
|
+
message: "gpt3.5-turbo does not support IMAGE strategy"
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
message: err.message ?? "invalid strategy configuration"
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
}).optional().default({
|
|
61
|
+
model: "claude-3-haiku",
|
|
62
|
+
type: "HTML"
|
|
63
|
+
});
|
|
64
|
+
const basicSchema = _zod.z.object({
|
|
65
|
+
type: _zod.z.string(),
|
|
66
|
+
description: _zod.z.string().optional()
|
|
67
|
+
});
|
|
68
|
+
const stringSchema = basicSchema.extend({
|
|
69
|
+
type: _zod.z.literal("string"),
|
|
70
|
+
enum: _zod.z.array(_zod.z.string()).optional(),
|
|
71
|
+
maxLength: _zod.z.number().optional(),
|
|
72
|
+
minLength: _zod.z.number().optional(),
|
|
73
|
+
pattern: _zod.z.string().optional()
|
|
74
|
+
});
|
|
75
|
+
const numberSchema = basicSchema.extend({
|
|
76
|
+
type: _zod.z.union([_zod.z.literal("number"), _zod.z.literal("integer")]),
|
|
77
|
+
multipleOf: _zod.z.number().optional(),
|
|
78
|
+
maximum: _zod.z.number().optional(),
|
|
79
|
+
exclusiveMaximum: _zod.z.number().optional(),
|
|
80
|
+
minimum: _zod.z.number().optional(),
|
|
81
|
+
exclusiveMinimum: _zod.z.number().optional()
|
|
82
|
+
});
|
|
83
|
+
const booleanSchema = basicSchema.extend({
|
|
84
|
+
type: _zod.z.literal("boolean")
|
|
85
|
+
});
|
|
86
|
+
const arraySchema = basicSchema.extend({
|
|
87
|
+
type: _zod.z.literal("array"),
|
|
88
|
+
items: _zod.z.lazy(() => jsonSchema),
|
|
89
|
+
maxItems: _zod.z.number().min(0).optional(),
|
|
90
|
+
minItems: _zod.z.number().min(0).optional(),
|
|
91
|
+
uniqueItems: _zod.z.boolean().optional()
|
|
92
|
+
});
|
|
93
|
+
const objectSchema = basicSchema.extend({
|
|
94
|
+
type: _zod.z.literal("object"),
|
|
95
|
+
properties: _zod.z.record(_zod.z.string(), _zod.z.lazy(() => jsonSchema)),
|
|
96
|
+
required: _zod.z.array(_zod.z.string()),
|
|
97
|
+
maxProperties: _zod.z.number().min(1).optional(),
|
|
98
|
+
minProperties: _zod.z.number().min(1).optional()
|
|
99
|
+
});
|
|
100
|
+
const jsonSchema = exports.jsonSchema = _zod.z.union([stringSchema, numberSchema, booleanSchema, arraySchema, objectSchema]);
|
|
101
|
+
const jsonSchemaCustomValidation = _zod.z.any().refine(value => {
|
|
102
|
+
try {
|
|
103
|
+
const ajv = new _ajv.default({});
|
|
104
|
+
void ajv.validateSchema(value, true);
|
|
105
|
+
return true;
|
|
106
|
+
} catch (e) {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
}, value => {
|
|
110
|
+
try {
|
|
111
|
+
const ajv = new _ajv.default({
|
|
112
|
+
strict: true,
|
|
113
|
+
strictRequired: true
|
|
114
|
+
});
|
|
115
|
+
value && void ajv.validateSchema(value, true);
|
|
116
|
+
} catch (e) {
|
|
117
|
+
const message = e.message.replace("schema is invalid: ", "").split(", ")[0].replace("data/", "").replaceAll("/", ".");
|
|
118
|
+
return {
|
|
119
|
+
message
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return {
|
|
123
|
+
message: "invalid data schema"
|
|
124
|
+
};
|
|
125
|
+
}).refine(v => {
|
|
126
|
+
if (v && v.type !== "object" && v.type !== "array") {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
return true;
|
|
130
|
+
}, {
|
|
131
|
+
message: "root schema must be an object or an array"
|
|
132
|
+
}).refine(v => {
|
|
133
|
+
if (v && v.type !== "object") {
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
return !!(v !== null && v !== void 0 && v.properties);
|
|
137
|
+
}, {
|
|
138
|
+
message: "object schema must have properties"
|
|
139
|
+
}).refine(v => {
|
|
140
|
+
if (v && v.type !== "array") {
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
return !!(v !== null && v !== void 0 && v.items);
|
|
144
|
+
}, {
|
|
145
|
+
message: "array schema must have items"
|
|
146
|
+
});
|
|
147
|
+
const extractDataInputJsonSchema = exports.extractDataInputJsonSchema = _zod.z.object({
|
|
148
|
+
label: labelSchema,
|
|
149
|
+
dataSchema: jsonSchemaCustomValidation,
|
|
150
|
+
strategy: strategySchema,
|
|
151
|
+
prompt: _zod.z.string().optional(),
|
|
152
|
+
searchRegion: _zod.z.any().refine(_locatorHelpers.isLocator, {
|
|
153
|
+
message: "invalid search region, search region must be a playwright locator"
|
|
154
|
+
}).optional(),
|
|
155
|
+
apiKey: _zod.z.string().optional()
|
|
156
|
+
});
|
|
157
|
+
const base64Schema = _zod.z.object({
|
|
158
|
+
type: _zod.z.literal("base64"),
|
|
159
|
+
data: _zod.z.string()
|
|
160
|
+
});
|
|
161
|
+
const bufferSchema = _zod.z.object({
|
|
162
|
+
type: _zod.z.literal("buffer"),
|
|
163
|
+
data: _zod.z.instanceof(Buffer)
|
|
164
|
+
});
|
|
165
|
+
const urlSchema = _zod.z.object({
|
|
166
|
+
type: _zod.z.literal("url"),
|
|
167
|
+
data: _zod.z.string().url()
|
|
168
|
+
});
|
|
169
|
+
const sourceSchema = _zod.z.union([base64Schema, urlSchema, bufferSchema], {
|
|
170
|
+
errorMap: err => {
|
|
171
|
+
if (err.code === "invalid_union") {
|
|
172
|
+
return {
|
|
173
|
+
message: "source must be a valid base64, buffer or url"
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
return {
|
|
177
|
+
message: err.message ?? ""
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
const pagesSchema = _zod.z.array(_zod.z.number()).optional();
|
|
182
|
+
const pdfFileSchema = exports.pdfFileSchema = _zod.z.object({
|
|
183
|
+
type: _zod.z.literal("pdf"),
|
|
184
|
+
pages: pagesSchema,
|
|
185
|
+
source: sourceSchema
|
|
186
|
+
});
|
|
187
|
+
const imageFileSchema = exports.imageFileSchema = _zod.z.object({
|
|
188
|
+
type: _zod.z.literal("image"),
|
|
189
|
+
source: sourceSchema
|
|
190
|
+
});
|
|
191
|
+
const sheetFileSchema = exports.sheetFileSchema = _zod.z.object({
|
|
192
|
+
type: _zod.z.literal("spreadsheet"),
|
|
193
|
+
sheetName: _zod.z.string(),
|
|
194
|
+
source: sourceSchema
|
|
195
|
+
});
|
|
196
|
+
const docFileSchema = exports.docFileSchema = _zod.z.object({
|
|
197
|
+
type: _zod.z.literal("document"),
|
|
198
|
+
pages: pagesSchema,
|
|
199
|
+
source: sourceSchema
|
|
200
|
+
});
|
|
201
|
+
const fileSchemas = [pdfFileSchema, imageFileSchema, sheetFileSchema, docFileSchema];
|
|
202
|
+
function validateFileSchema(data) {
|
|
203
|
+
if (!data) {
|
|
204
|
+
return [`"file" is required`];
|
|
205
|
+
}
|
|
206
|
+
const schema = fileSchemas.find(schema => schema.shape.type.safeParse(data.type).success);
|
|
207
|
+
if (!schema) {
|
|
208
|
+
const validTypes = fileSchemas.map(s => s.shape.type.value);
|
|
209
|
+
return [`data.type must be either ${validTypes.join(" or ")}, got ${data.type}`];
|
|
210
|
+
}
|
|
211
|
+
const result = schema.safeParse(data);
|
|
212
|
+
if (!result.success) {
|
|
213
|
+
return (0, _formatZodError.formatZodError)(result.error);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
const extractDataFromFileOptionsSchema = exports.extractDataFromFileOptionsSchema = _zod.z.object({
|
|
217
|
+
dataSchema: jsonSchemaCustomValidation,
|
|
218
|
+
label: labelSchema,
|
|
219
|
+
prompt: _zod.z.string().optional(),
|
|
220
|
+
strategy: _zod.z.union([markdownStrategySchema, imageStrategySchema]).optional()
|
|
221
|
+
});
|
|
222
|
+
const contentItemValidationSchema = _zod.z.union([_zod.z.object({
|
|
223
|
+
type: _zod.z.literal("text"),
|
|
224
|
+
data: _zod.z.string()
|
|
225
|
+
}), _zod.z.object({
|
|
226
|
+
type: _zod.z.literal("image-buffer"),
|
|
227
|
+
data: _zod.z.instanceof(Buffer),
|
|
228
|
+
image_type: _zod.z.enum(["png", "jpeg", "gif", "webp"])
|
|
229
|
+
}), _zod.z.object({
|
|
230
|
+
type: _zod.z.literal("image-url"),
|
|
231
|
+
data: _zod.z.string().url(),
|
|
232
|
+
image_type: _zod.z.enum(["png", "jpeg", "gif", "webp"])
|
|
233
|
+
})]);
|
|
234
|
+
const contentValidationSchema = exports.contentValidationSchema = _zod.z.union([_zod.z.array(contentItemValidationSchema).min(1, "content should have at least one item"), contentItemValidationSchema]);
|
|
235
|
+
const genericExtractDataInputSchema = exports.genericExtractDataInputSchema = _zod.z.object({
|
|
236
|
+
dataSchema: jsonSchemaCustomValidation,
|
|
237
|
+
prompt: _zod.z.string().optional(),
|
|
238
|
+
model: _zod.z.enum(_aiModelsValidations.SUPPORTED_TEXT_MODELS)
|
|
239
|
+
});
|