@intuned/browser-dev 2.2.3-unify-sdks.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai-extractors/AnthropicClient/index.js +23 -0
- package/dist/ai-extractors/export.d.js +5 -0
- package/dist/ai-extractors/export.d.ts +422 -0
- package/dist/ai-extractors/extractStructuredData.js +79 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/constants.js +7 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/errors.js +42 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingClaude.js +149 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStructuredDataUsingOpenAi.js +144 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/extractStrucutredDataUsingAiInstance.js +123 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/index.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/isItemTableHeaderOrFooter.js +96 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/screenshotHelpers.js +55 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/types.js +5 -0
- package/dist/ai-extractors/extractionHelpers/extractStructuredDataUsingAi/utils.js +53 -0
- package/dist/ai-extractors/extractionHelpers/types.js +5 -0
- package/dist/ai-extractors/fileExtractors.js +176 -0
- package/dist/ai-extractors/index.js +31 -0
- package/dist/ai-extractors/jsonSchema.d.js +5 -0
- package/dist/ai-extractors/jsonSchema.d.ts +49 -0
- package/dist/ai-extractors/openAiClients/index.js +23 -0
- package/dist/ai-extractors/validators.js +239 -0
- package/dist/browser/ai/export.d.js +3 -0
- package/dist/browser/ai/export.d.ts +587 -0
- package/dist/browser/ai/extractMarkdown.js +15 -0
- package/dist/browser/ai/extractStructuredData.js +231 -0
- package/dist/browser/ai/extractStructuredDataUsingAi.js +140 -0
- package/dist/browser/ai/extractionHelpers/screenshotHelpers.js +55 -0
- package/dist/browser/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/browser/ai/index.d.ts +587 -0
- package/dist/browser/ai/index.js +19 -0
- package/dist/browser/ai/isPageLoaded.js +67 -0
- package/dist/browser/ai/prompt.js +39 -0
- package/dist/browser/ai/tests/testCheckAllTypesAreStrings.spec.js +143 -0
- package/dist/browser/ai/tests/testExtractStructuredData.spec.js +622 -0
- package/dist/browser/ai/tools/index.js +48 -0
- package/dist/browser/ai/types/errors.js +67 -0
- package/dist/browser/ai/types/models.js +45 -0
- package/dist/browser/ai/types/types.js +48 -0
- package/dist/browser/ai/validators.js +136 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +50 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +17 -0
- package/dist/common/environmentVariables.js +16 -0
- package/dist/common/eventTracking/getAiTrackingHeaders.js +31 -0
- package/dist/common/eventTracking/getFileTrackingHeaders.js +23 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +1 -0
- package/dist/helpers/export.d.ts +1294 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +93 -0
- package/dist/helpers/index.d.ts +1294 -0
- package/dist/helpers/index.js +115 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +63 -0
- package/dist/helpers/sanitizeHtml.js +73 -0
- package/dist/helpers/saveFileToS3.js +46 -0
- package/dist/helpers/scrollToLoadContent.js +50 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +197 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testIsPageLoaded.spec.js +285 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testSimplifyHtml.spec.js +251 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +380 -0
- package/dist/helpers/tests/testWaitForDomSettled.spec.js +169 -0
- package/dist/helpers/tests/testWaitForNetworkIdle.spec.js +115 -0
- package/dist/helpers/types/Attachment.js +81 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +153 -0
- package/dist/helpers/utils/getS3Client.js +21 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +119 -0
- package/dist/helpers/waitForDomSettled.js +182 -0
- package/dist/helpers/waitForNetworkIdle.js +191 -0
- package/dist/index.d.js +82 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +84 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +87 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +221 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +149 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +145 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +175 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +97 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredData = void 0;
|
|
7
|
+
var _extractStructuredDataUsingAi = require("./extractStructuredDataUsingAi");
|
|
8
|
+
var _validators = require("./validators");
|
|
9
|
+
var _screenshotHelpers = require("./extractionHelpers/screenshotHelpers");
|
|
10
|
+
var _formatZodError = require("../../common/formatZodError");
|
|
11
|
+
var _cache = require("../../intunedServices/cache/cache");
|
|
12
|
+
var _locatorHelpers = require("../../common/locatorHelpers");
|
|
13
|
+
var _extractionHelpers = require("../../common/extractionHelpers");
|
|
14
|
+
var _getSimplifiedHtml = require("../../common/getSimplifiedHtml");
|
|
15
|
+
var _hashObject = require("../../common/hashObject");
|
|
16
|
+
var _Logger = require("../../common/Logger");
|
|
17
|
+
var _helpers = require("../../helpers");
|
|
18
|
+
var _xpathMapping = require("../../common/xpathMapping");
|
|
19
|
+
const extractStructuredData = async options => {
|
|
20
|
+
const pageOrLocator = options.source;
|
|
21
|
+
const isPageInput = (0, _locatorHelpers.isPage)(pageOrLocator);
|
|
22
|
+
const {
|
|
23
|
+
model,
|
|
24
|
+
strategy,
|
|
25
|
+
prompt,
|
|
26
|
+
apiKey,
|
|
27
|
+
enableDomMatching = false,
|
|
28
|
+
maxRetries = 3,
|
|
29
|
+
dataSchema,
|
|
30
|
+
enableCache = true
|
|
31
|
+
} = options;
|
|
32
|
+
const inputParsingResult = await _validators.extractDataInputJsonSchema.safeParseAsync({
|
|
33
|
+
source: pageOrLocator,
|
|
34
|
+
model,
|
|
35
|
+
strategy,
|
|
36
|
+
prompt,
|
|
37
|
+
apiKey,
|
|
38
|
+
enableDomMatching,
|
|
39
|
+
enableCache,
|
|
40
|
+
maxRetries,
|
|
41
|
+
dataSchema
|
|
42
|
+
});
|
|
43
|
+
if (!inputParsingResult.success) {
|
|
44
|
+
const errors = (0, _formatZodError.formatZodError)(inputParsingResult.error);
|
|
45
|
+
const message = `invalid input parameters for extractStructuredData: ${errors}`;
|
|
46
|
+
throw new Error(message);
|
|
47
|
+
}
|
|
48
|
+
const validatedData = inputParsingResult.data;
|
|
49
|
+
const pageObject = isPageInput ? pageOrLocator : pageOrLocator.page();
|
|
50
|
+
if (validatedData.enableDomMatching) {
|
|
51
|
+
if (!(0, _validators.checkAllTypesAreStrings)(validatedData.dataSchema)) {
|
|
52
|
+
throw new Error("For DOM matching, all types of the extraction fields must be STRINGS, to match with the DOM.");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
let cacheKey = "";
|
|
56
|
+
if (validatedData.strategy === "HTML") {
|
|
57
|
+
const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
|
|
58
|
+
if (!containerHandle) {
|
|
59
|
+
throw new Error("No HTML content found in the specified region.");
|
|
60
|
+
}
|
|
61
|
+
const simplifiedHtml = await (0, _getSimplifiedHtml.getSimplifiedHtml)(containerHandle);
|
|
62
|
+
if (validatedData.enableCache) {
|
|
63
|
+
cacheKey = (0, _hashObject.hashObject)({
|
|
64
|
+
pageUrl: pageObject.url(),
|
|
65
|
+
dataSchema: validatedData.dataSchema,
|
|
66
|
+
strategy: validatedData.strategy,
|
|
67
|
+
model: validatedData.model,
|
|
68
|
+
prompt: validatedData.prompt,
|
|
69
|
+
searchRegion: !isPageInput ? pageOrLocator.toString() : undefined,
|
|
70
|
+
...(validatedData.enableDomMatching ? {} : {
|
|
71
|
+
html: (0, _extractionHelpers.compressStringSpaces)(simplifiedHtml)
|
|
72
|
+
})
|
|
73
|
+
}, true);
|
|
74
|
+
const cachedResult = await _cache.cache.get(cacheKey);
|
|
75
|
+
if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
76
|
+
const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
|
|
77
|
+
if (isValid) {
|
|
78
|
+
_Logger.logger.info(`Cached results matched correctly with the current page, returning cached result`);
|
|
79
|
+
return cachedResult.result;
|
|
80
|
+
}
|
|
81
|
+
} else if (cachedResult && !validatedData.enableDomMatching) {
|
|
82
|
+
_Logger.logger.info(`Results for the extractor found in the cache, returning cached result`);
|
|
83
|
+
return cachedResult;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const result = await (0, _extractStructuredDataUsingAi.extractStructuredDataUsingAi)(pageObject, {
|
|
87
|
+
apiKey: validatedData.apiKey,
|
|
88
|
+
enableDomMatching: validatedData.enableDomMatching,
|
|
89
|
+
jsonSchema: validatedData.dataSchema,
|
|
90
|
+
model: validatedData.model,
|
|
91
|
+
content: simplifiedHtml,
|
|
92
|
+
prompt: validatedData.prompt,
|
|
93
|
+
images: [],
|
|
94
|
+
maxRetries: validatedData.maxRetries
|
|
95
|
+
});
|
|
96
|
+
if (result.isErr()) {
|
|
97
|
+
throw new Error(result.error.context);
|
|
98
|
+
}
|
|
99
|
+
if (validatedData.enableCache) {
|
|
100
|
+
if (!validatedData.enableDomMatching) {
|
|
101
|
+
await _cache.cache.set(cacheKey, result.value.result);
|
|
102
|
+
} else {
|
|
103
|
+
const resultsToCache = {
|
|
104
|
+
result: result.value.result,
|
|
105
|
+
matchesMapping: result.value.xpathMapping || {}
|
|
106
|
+
};
|
|
107
|
+
await _cache.cache.set(cacheKey, resultsToCache);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return result.value.result;
|
|
111
|
+
}
|
|
112
|
+
if (validatedData.strategy === "IMAGE") {
|
|
113
|
+
const containerHandle = isPageInput ? undefined : await pageOrLocator.elementHandle();
|
|
114
|
+
const images = await (0, _screenshotHelpers.buildImagesFromPageOrHandle)(pageObject, containerHandle);
|
|
115
|
+
if (validatedData.enableCache) {
|
|
116
|
+
cacheKey = (0, _hashObject.hashObject)({
|
|
117
|
+
pageUrl: pageObject.url(),
|
|
118
|
+
dataSchema: validatedData.dataSchema,
|
|
119
|
+
strategy: validatedData.strategy,
|
|
120
|
+
model: validatedData.model,
|
|
121
|
+
prompt: validatedData.prompt,
|
|
122
|
+
searchRegion: !isPageInput ? pageOrLocator.toString() : undefined,
|
|
123
|
+
...(validatedData.enableDomMatching ? {} : {
|
|
124
|
+
html: await pageObject.evaluate(() => document.documentElement.outerHTML)
|
|
125
|
+
})
|
|
126
|
+
}, true);
|
|
127
|
+
const cachedResult = await _cache.cache.get(cacheKey);
|
|
128
|
+
if (validatedData.enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
129
|
+
const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedResult.matchesMapping);
|
|
130
|
+
if (isValid) {
|
|
131
|
+
_Logger.logger.info("Cached results matched correctly with the data in the current page.\nReturning cached result");
|
|
132
|
+
return cachedResult.result;
|
|
133
|
+
}
|
|
134
|
+
} else if (cachedResult && !validatedData.enableDomMatching) {
|
|
135
|
+
_Logger.logger.info("Results for the extractor found in the cache.\nReturning cached result");
|
|
136
|
+
return cachedResult;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (images.isErr()) {
|
|
140
|
+
throw new Error(images.error.context);
|
|
141
|
+
}
|
|
142
|
+
const result = await (0, _extractStructuredDataUsingAi.extractStructuredDataUsingAi)(pageObject, {
|
|
143
|
+
apiKey: validatedData.apiKey,
|
|
144
|
+
enableDomMatching: validatedData.enableDomMatching,
|
|
145
|
+
jsonSchema: validatedData.dataSchema,
|
|
146
|
+
model: validatedData.model,
|
|
147
|
+
content: "Extract structured data from the following images.",
|
|
148
|
+
prompt: validatedData.prompt,
|
|
149
|
+
images: images.value,
|
|
150
|
+
maxRetries: validatedData.maxRetries
|
|
151
|
+
});
|
|
152
|
+
if (result.isErr()) {
|
|
153
|
+
throw new Error(result.error.context);
|
|
154
|
+
}
|
|
155
|
+
if (validatedData.enableCache) {
|
|
156
|
+
if (!validatedData.enableDomMatching) {
|
|
157
|
+
await _cache.cache.set(cacheKey, result.value.result);
|
|
158
|
+
} else {
|
|
159
|
+
const resultsToCache = {
|
|
160
|
+
result: result.value.result,
|
|
161
|
+
matchesMapping: result.value.xpathMapping || {}
|
|
162
|
+
};
|
|
163
|
+
await _cache.cache.set(cacheKey, resultsToCache);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return result.value.result;
|
|
167
|
+
}
|
|
168
|
+
if (validatedData.strategy === "MARKDOWN") {
|
|
169
|
+
const containerHandle = isPageInput ? await pageOrLocator.locator("html").elementHandle() : await pageOrLocator.elementHandle();
|
|
170
|
+
const html = await (containerHandle === null || containerHandle === void 0 ? void 0 : containerHandle.innerHTML());
|
|
171
|
+
if (!html) {
|
|
172
|
+
throw new Error("No HTML content found in the specified region.");
|
|
173
|
+
}
|
|
174
|
+
const markdown = await (0, _helpers.extractMarkdown)({
|
|
175
|
+
source: pageObject
|
|
176
|
+
});
|
|
177
|
+
if (validatedData.enableCache) {
|
|
178
|
+
cacheKey = (0, _hashObject.hashObject)({
|
|
179
|
+
pageUrl: pageObject.url(),
|
|
180
|
+
dataSchema: validatedData.dataSchema,
|
|
181
|
+
strategy: validatedData.strategy,
|
|
182
|
+
model: validatedData.model,
|
|
183
|
+
prompt: validatedData.prompt,
|
|
184
|
+
searchRegion: !isPageInput ? pageOrLocator.toString() : undefined,
|
|
185
|
+
...(validatedData.enableDomMatching ? {} : {
|
|
186
|
+
html: await pageObject.evaluate(() => document.documentElement.outerHTML),
|
|
187
|
+
markdown
|
|
188
|
+
})
|
|
189
|
+
}, true);
|
|
190
|
+
const cachedResult = await _cache.cache.get(cacheKey);
|
|
191
|
+
if (enableDomMatching && cachedResult && cachedResult.matchesMapping) {
|
|
192
|
+
const cachedXpathMapping = cachedResult.matchesMapping;
|
|
193
|
+
const isValid = await (0, _xpathMapping.validateXPathMapping)(pageObject, cachedXpathMapping);
|
|
194
|
+
if (isValid) {
|
|
195
|
+
console.log(`Cached results matched correctly with the data in the current page.\nReturning cached result`);
|
|
196
|
+
return cachedResult.result;
|
|
197
|
+
}
|
|
198
|
+
} else if (cachedResult && !enableDomMatching) {
|
|
199
|
+
_Logger.logger.info(`Results for the extractor found in the cache.\nReturning cached result`);
|
|
200
|
+
return cachedResult;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const result = await (0, _extractStructuredDataUsingAi.extractStructuredDataUsingAi)(pageObject, {
|
|
204
|
+
apiKey: validatedData.apiKey,
|
|
205
|
+
enableDomMatching: validatedData.enableDomMatching,
|
|
206
|
+
jsonSchema: validatedData.dataSchema,
|
|
207
|
+
model: validatedData.model,
|
|
208
|
+
content: markdown,
|
|
209
|
+
prompt: validatedData.prompt,
|
|
210
|
+
images: [],
|
|
211
|
+
maxRetries: validatedData.maxRetries
|
|
212
|
+
});
|
|
213
|
+
if (result.isErr()) {
|
|
214
|
+
throw new Error(result.error.context);
|
|
215
|
+
}
|
|
216
|
+
if (validatedData.enableCache) {
|
|
217
|
+
if (!enableDomMatching) {
|
|
218
|
+
await _cache.cache.set(cacheKey, result.value.result);
|
|
219
|
+
return result.value.result;
|
|
220
|
+
}
|
|
221
|
+
const resultsToCache = {
|
|
222
|
+
result: result.value.result,
|
|
223
|
+
matchesMapping: result.value.xpathMapping || {}
|
|
224
|
+
};
|
|
225
|
+
await _cache.cache.set(cacheKey, resultsToCache);
|
|
226
|
+
}
|
|
227
|
+
return result.value.result;
|
|
228
|
+
}
|
|
229
|
+
throw new Error(`Unsupported strategy type: ${validatedData.strategy}. Supported types are: HTML, IMAGE, and MARKDOWN.`);
|
|
230
|
+
};
|
|
231
|
+
exports.extractStructuredData = extractStructuredData;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractStructuredDataUsingAi = extractStructuredDataUsingAi;
|
|
7
|
+
var _neverthrow = require("neverthrow");
|
|
8
|
+
var Errors = _interopRequireWildcard(require("./types/errors"));
|
|
9
|
+
var _getAiTrackingHeaders = require("../../common/eventTracking/getAiTrackingHeaders");
|
|
10
|
+
var _environmentVariables = require("../../common/environmentVariables");
|
|
11
|
+
var _Logger = require("../../common/Logger");
|
|
12
|
+
var _collectStrings = require("../../common/matching/collectStrings");
|
|
13
|
+
var _matching = require("../../common/matching/matching");
|
|
14
|
+
var _validateSchema = require("./extractionHelpers/validateSchema");
|
|
15
|
+
var _factory = require("../../intunedServices/ApiGateway/factory");
|
|
16
|
+
var _tools = require("./tools");
|
|
17
|
+
var _prompt = require("./prompt");
|
|
18
|
+
var _ai = require("ai");
|
|
19
|
+
var _loadRuntime = require("../../common/loadRuntime");
|
|
20
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
21
|
+
async function extractStructuredDataUsingAi(page, input) {
|
|
22
|
+
var _getExecutionContext, _getExecutionContext2, _getExecutionContext3;
|
|
23
|
+
const {
|
|
24
|
+
apiKey,
|
|
25
|
+
enableDomMatching,
|
|
26
|
+
jsonSchema,
|
|
27
|
+
model,
|
|
28
|
+
content,
|
|
29
|
+
prompt,
|
|
30
|
+
images,
|
|
31
|
+
maxRetries = 3
|
|
32
|
+
} = input;
|
|
33
|
+
let accumulatedCost = 0;
|
|
34
|
+
const getExecutionContext = await (0, _loadRuntime.loadRuntime)();
|
|
35
|
+
const toolName = `extract_data`;
|
|
36
|
+
const headers = (0, _getAiTrackingHeaders.getAiTrackingHeaders)({
|
|
37
|
+
environment: (0, _environmentVariables.getEnvironmentVariable)("RUN_ENVIRONMENT"),
|
|
38
|
+
type: "DYNAMIC_LIST",
|
|
39
|
+
runId: ((_getExecutionContext = getExecutionContext()) === null || _getExecutionContext === void 0 ? void 0 : _getExecutionContext.runId) || undefined,
|
|
40
|
+
jobId: ((_getExecutionContext2 = getExecutionContext()) === null || _getExecutionContext2 === void 0 ? void 0 : _getExecutionContext2.jobId) || undefined,
|
|
41
|
+
jobRunId: ((_getExecutionContext3 = getExecutionContext()) === null || _getExecutionContext3 === void 0 ? void 0 : _getExecutionContext3.jobRunId) || undefined
|
|
42
|
+
});
|
|
43
|
+
const gateway = _factory.GatewayFactory.createAIGateway();
|
|
44
|
+
const gatewayModel = gateway.getModel(model, apiKey);
|
|
45
|
+
const tools = (0, _tools.getTools)(toolName, jsonSchema);
|
|
46
|
+
const messages = (0, _prompt.getMessages)({
|
|
47
|
+
prompt,
|
|
48
|
+
content,
|
|
49
|
+
images,
|
|
50
|
+
enableDomMatching
|
|
51
|
+
});
|
|
52
|
+
const messagesHistory = messages;
|
|
53
|
+
let currentRetry = 0;
|
|
54
|
+
let result;
|
|
55
|
+
while (currentRetry < maxRetries) {
|
|
56
|
+
try {
|
|
57
|
+
var _result$usage;
|
|
58
|
+
result = await (0, _ai.generateText)({
|
|
59
|
+
model: gatewayModel,
|
|
60
|
+
messages: messagesHistory,
|
|
61
|
+
tools: tools.isOk() ? tools.value : {},
|
|
62
|
+
toolChoice: "required",
|
|
63
|
+
maxRetries,
|
|
64
|
+
headers
|
|
65
|
+
});
|
|
66
|
+
accumulatedCost += ((_result$usage = result.usage) === null || _result$usage === void 0 ? void 0 : _result$usage.totalTokens) ?? 0;
|
|
67
|
+
_Logger.logger.info(`AI extraction cost: ${accumulatedCost}`);
|
|
68
|
+
const toolCall = result.toolCalls[0] ?? null;
|
|
69
|
+
let extractedData = toolCall.input;
|
|
70
|
+
const isArray = jsonSchema.type === "array";
|
|
71
|
+
if (isArray && extractedData.extracted_data) {
|
|
72
|
+
extractedData = extractedData.extracted_data;
|
|
73
|
+
}
|
|
74
|
+
const errors = (0, _validateSchema.validateToolCallSchema)(extractedData, jsonSchema);
|
|
75
|
+
if (errors.length > 0) {
|
|
76
|
+
const reaskMessage = (0, _validateSchema.createReaskMessage)(errors);
|
|
77
|
+
const modelMessages = (0, _ai.convertToModelMessages)([{
|
|
78
|
+
role: "assistant",
|
|
79
|
+
parts: [{
|
|
80
|
+
type: "step-start"
|
|
81
|
+
}, {
|
|
82
|
+
type: "text",
|
|
83
|
+
text: result.text,
|
|
84
|
+
state: "done"
|
|
85
|
+
}, {
|
|
86
|
+
type: `tool-${toolName}`,
|
|
87
|
+
state: "output-error",
|
|
88
|
+
toolCallId: toolCall.toolCallId,
|
|
89
|
+
input: extractedData,
|
|
90
|
+
errorText: reaskMessage
|
|
91
|
+
}]
|
|
92
|
+
}]);
|
|
93
|
+
messagesHistory.push(...modelMessages);
|
|
94
|
+
currentRetry++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (!enableDomMatching) {
|
|
98
|
+
return (0, _neverthrow.ok)({
|
|
99
|
+
result: extractedData,
|
|
100
|
+
usage: accumulatedCost,
|
|
101
|
+
xpathMapping: {}
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
const stringsToMatch = (0, _collectStrings.collectStrings)({
|
|
105
|
+
dataStructure: extractedData
|
|
106
|
+
});
|
|
107
|
+
if (!stringsToMatch || stringsToMatch.length === 0) {
|
|
108
|
+
return (0, _neverthrow.ok)({
|
|
109
|
+
result: [],
|
|
110
|
+
usage: accumulatedCost,
|
|
111
|
+
xpathMapping: {}
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
const {
|
|
115
|
+
replacements,
|
|
116
|
+
xpathMapping
|
|
117
|
+
} = await (0, _matching.replaceWithBestMatches)({
|
|
118
|
+
stringsToMatch,
|
|
119
|
+
pageObject: page
|
|
120
|
+
});
|
|
121
|
+
const stringReplacements = {};
|
|
122
|
+
Object.entries(replacements).forEach(([key, value]) => {
|
|
123
|
+
stringReplacements[key] = (value === null || value === void 0 ? void 0 : value.matchText) || null;
|
|
124
|
+
});
|
|
125
|
+
const matchesData = await (0, _validateSchema.recursivelyReplaceStrings)(extractedData, stringReplacements);
|
|
126
|
+
return (0, _neverthrow.ok)({
|
|
127
|
+
result: matchesData,
|
|
128
|
+
usage: accumulatedCost,
|
|
129
|
+
xpathMapping
|
|
130
|
+
});
|
|
131
|
+
} catch (error) {
|
|
132
|
+
_Logger.logger.error("Error during AI extraction", {
|
|
133
|
+
error,
|
|
134
|
+
model
|
|
135
|
+
});
|
|
136
|
+
return (0, _neverthrow.err)(Errors.invalidExtractionResult(error instanceof Error ? error.message : "Unknown error during extraction"));
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return (0, _neverthrow.err)(Errors.maxRetriesExceeded(`Max retries of ${maxRetries} exceeded for extraction`));
|
|
140
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.buildImagesFromPageOrHandle = buildImagesFromPageOrHandle;
|
|
7
|
+
exports.captureFullPageImagesWithOverlap = captureFullPageImagesWithOverlap;
|
|
8
|
+
var _neverthrow = require("neverthrow");
|
|
9
|
+
var errors = _interopRequireWildcard(require("../types/errors"));
|
|
10
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
11
|
+
async function captureFullPageImagesWithOverlap(page, options = {
|
|
12
|
+
overlap: 200,
|
|
13
|
+
sliceHeight: 1000
|
|
14
|
+
}) {
|
|
15
|
+
const totalHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
16
|
+
let currentHeight = 0;
|
|
17
|
+
const buffers = [];
|
|
18
|
+
while (currentHeight < totalHeight) {
|
|
19
|
+
if (buffers.length > 10) {
|
|
20
|
+
console.info(`the page is too long, only first ${totalHeight} px of the page will be captured.`);
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
await page.setViewportSize({
|
|
24
|
+
width: 1200,
|
|
25
|
+
height: options.sliceHeight
|
|
26
|
+
});
|
|
27
|
+
await page.evaluate(y => window.scrollTo(0, y), currentHeight - (currentHeight > 0 ? options.overlap : 0));
|
|
28
|
+
await page.waitForTimeout(500);
|
|
29
|
+
const buffer = await page.screenshot();
|
|
30
|
+
buffers.push(buffer);
|
|
31
|
+
currentHeight += options.sliceHeight - options.overlap;
|
|
32
|
+
}
|
|
33
|
+
return buffers;
|
|
34
|
+
}
|
|
35
|
+
async function buildImagesFromPageOrHandle(page, searchRegionHandler) {
|
|
36
|
+
const originalViewPortSize = page.viewportSize();
|
|
37
|
+
await page.setViewportSize({
|
|
38
|
+
width: 1200,
|
|
39
|
+
height: 800
|
|
40
|
+
});
|
|
41
|
+
if (searchRegionHandler) {
|
|
42
|
+
const size = await searchRegionHandler.boundingBox();
|
|
43
|
+
if (!size) {
|
|
44
|
+
return (0, _neverthrow.err)(errors.other("the provided search region is very large, image extraction support up to 5000px height."));
|
|
45
|
+
}
|
|
46
|
+
return (0, _neverthrow.ok)([await searchRegionHandler.screenshot({
|
|
47
|
+
type: "png"
|
|
48
|
+
})]);
|
|
49
|
+
}
|
|
50
|
+
const fullPageImages = await captureFullPageImagesWithOverlap(page);
|
|
51
|
+
if (originalViewPortSize) {
|
|
52
|
+
await page.setViewportSize(originalViewPortSize);
|
|
53
|
+
}
|
|
54
|
+
return (0, _neverthrow.ok)(fullPageImages);
|
|
55
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.createReaskMessage = createReaskMessage;
|
|
7
|
+
exports.recursivelyReplaceStrings = recursivelyReplaceStrings;
|
|
8
|
+
exports.validateJSONSchema = validateJSONSchema;
|
|
9
|
+
exports.validateToolCallSchema = validateToolCallSchema;
|
|
10
|
+
var _neverthrow = require("neverthrow");
|
|
11
|
+
var errors = _interopRequireWildcard(require("../types/errors"));
|
|
12
|
+
var _ajv = _interopRequireDefault(require("ajv"));
|
|
13
|
+
var _ajvFormats = _interopRequireDefault(require("ajv-formats"));
|
|
14
|
+
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
15
|
+
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
16
|
+
function validateJSONSchema(schema) {
|
|
17
|
+
if (!schema || typeof schema !== "object") {
|
|
18
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must be an object"));
|
|
19
|
+
}
|
|
20
|
+
if (!schema.type) {
|
|
21
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Schema must have a 'type' property"));
|
|
22
|
+
}
|
|
23
|
+
const validTypes = ["string", "number", "integer", "boolean", "array", "object"];
|
|
24
|
+
if (!validTypes.includes(schema.type)) {
|
|
25
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema type: ${schema.type}`));
|
|
26
|
+
}
|
|
27
|
+
if (schema.type === "array") {
|
|
28
|
+
if (!schema.items) {
|
|
29
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Array schema must have 'items' property"));
|
|
30
|
+
}
|
|
31
|
+
const itemsValidation = validateJSONSchema(schema.items);
|
|
32
|
+
if (itemsValidation.isErr()) {
|
|
33
|
+
return itemsValidation;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (schema.type === "object") {
|
|
37
|
+
if (!schema.properties || typeof schema.properties !== "object") {
|
|
38
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Object schema must have 'properties' object"));
|
|
39
|
+
}
|
|
40
|
+
for (const [key, propSchema] of Object.entries(schema.properties)) {
|
|
41
|
+
const propValidation = validateJSONSchema(propSchema);
|
|
42
|
+
if (propValidation.isErr()) {
|
|
43
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema(`Invalid schema for property '${key}': ${propValidation.error.context}`));
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (schema.required && !Array.isArray(schema.required)) {
|
|
47
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("'required' must be an array of property names"));
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (schema.type === "number" || schema.type === "integer") {
|
|
51
|
+
if (schema.maximum !== undefined && schema.exclusiveMaximum !== undefined) {
|
|
52
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Cannot have both 'maximum' and 'exclusiveMaximum'"));
|
|
53
|
+
}
|
|
54
|
+
if (schema.minimum !== undefined && schema.exclusiveMinimum !== undefined) {
|
|
55
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("Cannot have both 'minimum' and 'exclusiveMinimum'"));
|
|
56
|
+
}
|
|
57
|
+
if (schema.minimum > schema.maximum) {
|
|
58
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("'minimum' cannot be greater than 'maximum'"));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (schema.type === "string") {
|
|
62
|
+
if (schema.maxLength !== undefined && schema.minLength !== undefined) {
|
|
63
|
+
if (schema.minLength > schema.maxLength) {
|
|
64
|
+
return (0, _neverthrow.err)(errors.invalidJsonSchema("'minLength' cannot be greater than 'maxLength'"));
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return (0, _neverthrow.ok)(schema);
|
|
69
|
+
}
|
|
70
|
+
function recursivelyReplaceStrings(dataStructure, replacements) {
|
|
71
|
+
if (typeof dataStructure === "string") {
|
|
72
|
+
return replacements[dataStructure] ?? dataStructure;
|
|
73
|
+
} else if (typeof dataStructure === "number") {
|
|
74
|
+
const replacement = replacements[dataStructure.toString()];
|
|
75
|
+
return replacement !== null && replacement !== undefined ? replacement : dataStructure;
|
|
76
|
+
} else if (Array.isArray(dataStructure)) {
|
|
77
|
+
return dataStructure.map(item => recursivelyReplaceStrings(item, replacements));
|
|
78
|
+
} else if (dataStructure !== null && typeof dataStructure === "object") {
|
|
79
|
+
const result = {};
|
|
80
|
+
for (const [key, value] of Object.entries(dataStructure)) {
|
|
81
|
+
result[key] = recursivelyReplaceStrings(value, replacements);
|
|
82
|
+
}
|
|
83
|
+
return result;
|
|
84
|
+
} else {
|
|
85
|
+
return dataStructure;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
function validateToolCallSchema(instance, schema) {
|
|
89
|
+
const ajv = new _ajv.default({
|
|
90
|
+
allErrors: true,
|
|
91
|
+
verbose: true
|
|
92
|
+
});
|
|
93
|
+
(0, _ajvFormats.default)(ajv);
|
|
94
|
+
const validate = ajv.compile(schema);
|
|
95
|
+
const isValid = validate(instance);
|
|
96
|
+
if (isValid) {
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
const errors = [];
|
|
100
|
+
if (validate.errors) {
|
|
101
|
+
for (const error of validate.errors) {
|
|
102
|
+
let pathString = "root";
|
|
103
|
+
if (error.instancePath) {
|
|
104
|
+
const pathParts = error.instancePath.slice(1).split("/").map(part => {
|
|
105
|
+
if (/^\d+$/.test(part)) {
|
|
106
|
+
return `[${part}]`;
|
|
107
|
+
}
|
|
108
|
+
return part;
|
|
109
|
+
});
|
|
110
|
+
if (pathParts.length > 0) {
|
|
111
|
+
pathString = "root." + pathParts.join(".");
|
|
112
|
+
pathString = pathString.replace(/\.\[/g, "[");
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
let schemaPathString = "schema";
|
|
116
|
+
if (error.schemaPath) {
|
|
117
|
+
const schemaParts = error.schemaPath.slice(1).split("/");
|
|
118
|
+
if (schemaParts.length > 0) {
|
|
119
|
+
schemaPathString = "schema." + schemaParts.join(".");
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
let invalidValue = instance;
|
|
123
|
+
if (error.instancePath) {
|
|
124
|
+
const pathParts = error.instancePath.slice(1).split("/");
|
|
125
|
+
for (const part of pathParts) {
|
|
126
|
+
if (!invalidValue) {
|
|
127
|
+
invalidValue = invalidValue[part];
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
errors.push({
|
|
132
|
+
path: pathString,
|
|
133
|
+
message: error.message || "Validation error",
|
|
134
|
+
value: invalidValue,
|
|
135
|
+
schema_path: schemaPathString
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return errors;
|
|
140
|
+
}
|
|
141
|
+
function createReaskMessage(validationErrors) {
|
|
142
|
+
const formattedErrors = validationErrors.map((error, index) => `${index + 1}. Path "${error.path}": ${error.message}`).join("\n");
|
|
143
|
+
return `The extracted data has the following validation errors that need to be fixed:
|
|
144
|
+
|
|
145
|
+
${formattedErrors}
|
|
146
|
+
|
|
147
|
+
Please extract the data again, ensuring it follows the exact schema requirements.`;
|
|
148
|
+
}
|