@intuned/browser-dev 2.2.3-test-build.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai/export.d.js +5 -0
- package/dist/ai/export.d.ts +641 -0
- package/dist/ai/extractStructuredData.js +320 -0
- package/dist/ai/extractStructuredDataUsingAi.js +139 -0
- package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
- package/dist/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/ai/index.d.ts +641 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/isPageLoaded.js +77 -0
- package/dist/ai/prompt.js +39 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
- package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
- package/dist/ai/tools/index.js +48 -0
- package/dist/ai/types/errors.js +67 -0
- package/dist/ai/types/models.js +45 -0
- package/dist/ai/types/types.js +48 -0
- package/dist/ai/validators.js +167 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +32 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +18 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/clickUntilExhausted.js +85 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +5 -0
- package/dist/helpers/export.d.ts +1220 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +98 -0
- package/dist/helpers/index.d.ts +1220 -0
- package/dist/helpers/index.js +128 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +64 -0
- package/dist/helpers/sanitizeHtml.js +74 -0
- package/dist/helpers/saveFileToS3.js +50 -0
- package/dist/helpers/scrollToLoadContent.js +57 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
- package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
- package/dist/helpers/types/Attachment.js +115 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +154 -0
- package/dist/helpers/utils/getS3Client.js +22 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +103 -0
- package/dist/helpers/waitForDomSettled.js +90 -0
- package/dist/helpers/withNetworkSettledWait.js +91 -0
- package/dist/index.d.js +16 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +16 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
- package/generated-docs/helpers/functions/processDate.mdx +55 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
- package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +119 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractArrayFromPage = exports.extractArrayFromLocator = void 0;
|
|
7
|
+
var _validators = require("./validators");
|
|
8
|
+
var _formatZodError = require("../common/formatZodError");
|
|
9
|
+
var _dynamicListExtractor = require("./listExtractionHelpers/dynamicListExtractor");
|
|
10
|
+
var _SdkError = require("../common/SdkError");
|
|
11
|
+
var _Logger = require("../common/Logger");
|
|
12
|
+
const extractArrayFromPage = async (page, options) => {
|
|
13
|
+
const originalPositionStack = (0, _SdkError.captureUserStack)();
|
|
14
|
+
if (!page) {
|
|
15
|
+
throw new _SdkError.SdkError("Invalid page object, page must be an instance of playwright page", originalPositionStack);
|
|
16
|
+
}
|
|
17
|
+
const optionsValidationResults = _validators.extractArrayOptimizedInputSchema.safeParse(options);
|
|
18
|
+
if (!optionsValidationResults.success) {
|
|
19
|
+
const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
|
|
20
|
+
const message = `invalid extractArrayFromPage input: ${errors.join("\n")}`;
|
|
21
|
+
throw new _SdkError.SdkError(message, originalPositionStack);
|
|
22
|
+
}
|
|
23
|
+
const validOptions = optionsValidationResults.data;
|
|
24
|
+
const result = await (0, _dynamicListExtractor.dynamicListExtractor)(page, validOptions.label, {
|
|
25
|
+
itemEntityName: validOptions.itemEntityName ?? "data",
|
|
26
|
+
itemEntitySchema: validOptions.itemEntitySchema,
|
|
27
|
+
strategy: validOptions.strategy,
|
|
28
|
+
optionalPropertiesInvalidator: validOptions.optionalPropertiesInvalidator,
|
|
29
|
+
variantKey: validOptions.variantKey,
|
|
30
|
+
prompt: validOptions.prompt,
|
|
31
|
+
apiKey: validOptions.apiKey
|
|
32
|
+
});
|
|
33
|
+
if (result.isErr()) {
|
|
34
|
+
switch (result.error.type) {
|
|
35
|
+
case "InvalidSearchRegion":
|
|
36
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
|
|
37
|
+
throw new _SdkError.SdkError("InvalidSearchRegion", originalPositionStack);
|
|
38
|
+
case "InvalidInput":
|
|
39
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid input: ${result.error.context}`);
|
|
40
|
+
throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
|
|
41
|
+
case "InvalidAddressUrl":
|
|
42
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid address url: ${result.error.context}`);
|
|
43
|
+
throw new _SdkError.SdkError("InvalidAddressUrl", originalPositionStack);
|
|
44
|
+
case "InvalidExtractionResult":
|
|
45
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid extraction result: ${result.error.context}`);
|
|
46
|
+
throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
|
|
47
|
+
case "InvalidList":
|
|
48
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid list: ${result.error.context}`);
|
|
49
|
+
throw new _SdkError.SdkError("InvalidList", originalPositionStack);
|
|
50
|
+
case "NoResultsFound":
|
|
51
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid results: No results found were found. It's probably that the page is empty.`);
|
|
52
|
+
throw new _SdkError.SdkError("NoResultsFound", originalPositionStack);
|
|
53
|
+
case "RequiredPropertyNotExtracted":
|
|
54
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Required property not extracted: ${result.error.context}`);
|
|
55
|
+
throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
|
|
56
|
+
case "Other":
|
|
57
|
+
console.log(result.error.error ?? "");
|
|
58
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Other error: ${result.error.error ?? result.error.context}`);
|
|
59
|
+
throw new _SdkError.SdkError("Other", originalPositionStack);
|
|
60
|
+
case "InsufficientAiCredits":
|
|
61
|
+
result.error.context && _Logger.logger.error(result.error.context);
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return result.value;
|
|
66
|
+
};
|
|
67
|
+
exports.extractArrayFromPage = extractArrayFromPage;
|
|
68
|
+
const extractArrayFromLocator = async (locator, options) => {
|
|
69
|
+
const optionsValidationResults = _validators.extractArrayOptimizedInputSchema.safeParse(options);
|
|
70
|
+
const originalPositionStack = (0, _SdkError.captureUserStack)();
|
|
71
|
+
if (!optionsValidationResults.success) {
|
|
72
|
+
const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
|
|
73
|
+
const message = `invalid extractArrayFromLocator input: ${errors.join("\n")}`;
|
|
74
|
+
throw new _SdkError.SdkError(message, originalPositionStack);
|
|
75
|
+
}
|
|
76
|
+
const validOptions = optionsValidationResults.data;
|
|
77
|
+
const page = await locator.page();
|
|
78
|
+
const result = await (0, _dynamicListExtractor.dynamicListExtractor)(page, validOptions.label, {
|
|
79
|
+
itemEntityName: validOptions.itemEntityName,
|
|
80
|
+
itemEntitySchema: validOptions.itemEntitySchema,
|
|
81
|
+
strategy: validOptions.strategy,
|
|
82
|
+
optionalPropertiesInvalidator: validOptions.optionalPropertiesInvalidator,
|
|
83
|
+
variantKey: validOptions.variantKey,
|
|
84
|
+
searchRegion: locator
|
|
85
|
+
});
|
|
86
|
+
if (result.isErr()) {
|
|
87
|
+
switch (result.error.type) {
|
|
88
|
+
case "InvalidSearchRegion":
|
|
89
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
|
|
90
|
+
throw new Error("InvalidSearchRegion");
|
|
91
|
+
case "InvalidInput":
|
|
92
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid input: ${result.error.context}`);
|
|
93
|
+
throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
|
|
94
|
+
case "InvalidAddressUrl":
|
|
95
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid address url: ${result.error.context}`);
|
|
96
|
+
throw new _SdkError.SdkError("InvalidAddressUrl", originalPositionStack);
|
|
97
|
+
case "InvalidExtractionResult":
|
|
98
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid extraction result: ${result.error.context}`);
|
|
99
|
+
throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
|
|
100
|
+
case "InvalidList":
|
|
101
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid list: ${result.error.context}`);
|
|
102
|
+
throw new _SdkError.SdkError("InvalidList", originalPositionStack);
|
|
103
|
+
case "NoResultsFound":
|
|
104
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Invalid results: No results found were found. It's probably that the page is empty.`);
|
|
105
|
+
throw new _SdkError.SdkError("NoResultsFound", originalPositionStack);
|
|
106
|
+
case "RequiredPropertyNotExtracted":
|
|
107
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Required property not extracted: ${result.error.context}`);
|
|
108
|
+
throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
|
|
109
|
+
case "Other":
|
|
110
|
+
console.log(result.error.error ?? "");
|
|
111
|
+
_Logger.logger.error(`Optimized array extractor ${options.itemEntityName} - Other error: ${result.error.error ?? result.error.context}`);
|
|
112
|
+
throw new _SdkError.SdkError("Other", originalPositionStack);
|
|
113
|
+
case "InsufficientAiCredits":
|
|
114
|
+
console.log(result.error.context, originalPositionStack);
|
|
115
|
+
throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return result.value;
|
|
119
|
+
};
|
|
120
|
+
exports.extractArrayFromLocator = extractArrayFromLocator;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.extractObjectFromPage = exports.extractObjectFromLocator = void 0;
|
|
7
|
+
var _validators = require("./validators");
|
|
8
|
+
var _formatZodError = require("../common/formatZodError");
|
|
9
|
+
var _dynamicObjectExtractor = require("./objectExtractionHelpers/dynamicObjectExtractor");
|
|
10
|
+
var _SdkError = require("../common/SdkError");
|
|
11
|
+
const extractObjectFromPage = async (page, options) => {
|
|
12
|
+
const originalPositionStack = (0, _SdkError.captureUserStack)();
|
|
13
|
+
const optionsValidationResults = _validators.extractObjectOptimizedInputSchema.safeParse(options);
|
|
14
|
+
if (!optionsValidationResults.success) {
|
|
15
|
+
const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
|
|
16
|
+
const message = `invalid extractObjectFromPage input: ${errors.join("\n")}`;
|
|
17
|
+
throw new _SdkError.SdkError(message, originalPositionStack);
|
|
18
|
+
}
|
|
19
|
+
const parsedOptions = optionsValidationResults.data;
|
|
20
|
+
const result = await (0, _dynamicObjectExtractor.dynamicObjectExtractor)(page, options.label, {
|
|
21
|
+
entityName: parsedOptions.entityName ?? "data",
|
|
22
|
+
entitySchema: parsedOptions.entitySchema,
|
|
23
|
+
strategy: parsedOptions.strategy,
|
|
24
|
+
optionalPropertiesInvalidator: parsedOptions.optionalPropertiesInvalidator,
|
|
25
|
+
variantKey: parsedOptions.variantKey,
|
|
26
|
+
prompt: parsedOptions.prompt,
|
|
27
|
+
apiKey: parsedOptions.apiKey
|
|
28
|
+
});
|
|
29
|
+
if (result.isErr()) {
|
|
30
|
+
switch (result.error.type) {
|
|
31
|
+
case "InvalidSearchRegion":
|
|
32
|
+
console.log(`Object Extractor ${options.entityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
|
|
33
|
+
throw new _SdkError.SdkError("InvalidContainer", originalPositionStack);
|
|
34
|
+
case "InvalidInput":
|
|
35
|
+
console.log(`Object Extractor ${options.entityName} - Invalid input: ${result.error.context}`);
|
|
36
|
+
throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
|
|
37
|
+
case "InvalidExtractionResult":
|
|
38
|
+
console.log(`Object Extractor ${options.entityName} - Invalid extraction result: ${result.error.context}`);
|
|
39
|
+
throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
|
|
40
|
+
case "InvalidPageState":
|
|
41
|
+
console.log(`Object Extractor ${options.entityName} - Invalid page state: ${result.error.context}`);
|
|
42
|
+
throw new _SdkError.SdkError("InvalidPageState", originalPositionStack);
|
|
43
|
+
case "Other":
|
|
44
|
+
console.log(`Object Extractor ${options.entityName} - Other error: ${result.error.context}`);
|
|
45
|
+
throw new _SdkError.SdkError("Other", originalPositionStack);
|
|
46
|
+
case "RequiredPropertyNotExtracted":
|
|
47
|
+
console.log(`Object Extractor ${options.entityName} - Required property not extracted: ${result.error.context}`);
|
|
48
|
+
throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
|
|
49
|
+
case "InsufficientAiCredits":
|
|
50
|
+
console.log(result.error.context);
|
|
51
|
+
throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return result.value;
|
|
55
|
+
};
|
|
56
|
+
exports.extractObjectFromPage = extractObjectFromPage;
|
|
57
|
+
const extractObjectFromLocator = async (locator, options) => {
|
|
58
|
+
const originalPositionStack = (0, _SdkError.captureUserStack)();
|
|
59
|
+
const optionsValidationResults = _validators.extractObjectOptimizedInputSchema.safeParse(options);
|
|
60
|
+
if (!optionsValidationResults.success) {
|
|
61
|
+
const errors = (0, _formatZodError.formatZodError)(optionsValidationResults.error);
|
|
62
|
+
const message = `invalid extractObjectFromLocator input: ${errors.join("\n")}`;
|
|
63
|
+
throw new _SdkError.SdkError(message, originalPositionStack);
|
|
64
|
+
}
|
|
65
|
+
const data = optionsValidationResults.data;
|
|
66
|
+
const page = locator.page();
|
|
67
|
+
const result = await (0, _dynamicObjectExtractor.dynamicObjectExtractor)(page, options.label, {
|
|
68
|
+
entityName: data.entityName ?? "data",
|
|
69
|
+
entitySchema: data.entitySchema,
|
|
70
|
+
strategy: data.strategy,
|
|
71
|
+
optionalPropertiesInvalidator: data.optionalPropertiesInvalidator,
|
|
72
|
+
variantKey: data.variantKey,
|
|
73
|
+
searchRegion: locator,
|
|
74
|
+
prompt: data.prompt,
|
|
75
|
+
apiKey: data.apiKey
|
|
76
|
+
});
|
|
77
|
+
if (result.isErr()) {
|
|
78
|
+
switch (result.error.type) {
|
|
79
|
+
case "InvalidSearchRegion":
|
|
80
|
+
console.log(`Object Extractor ${options.entityName} - Search region is invalid. Please make sure container is correct. Object extraction relies on the container when passed.`);
|
|
81
|
+
throw new _SdkError.SdkError("InvalidContainer", originalPositionStack);
|
|
82
|
+
case "InvalidInput":
|
|
83
|
+
console.log(`Object Extractor ${options.entityName} - Invalid input: ${result.error.context}`);
|
|
84
|
+
throw new _SdkError.SdkError("InvalidInput", originalPositionStack);
|
|
85
|
+
case "InvalidExtractionResult":
|
|
86
|
+
console.log(`Object Extractor ${options.entityName} - Invalid extraction result: ${result.error.context}`);
|
|
87
|
+
throw new _SdkError.SdkError("InvalidExtractionResult", originalPositionStack);
|
|
88
|
+
case "InvalidPageState":
|
|
89
|
+
console.log(`Object Extractor ${options.entityName} - Invalid page state: ${result.error.context}`);
|
|
90
|
+
throw new _SdkError.SdkError("InvalidPageState", originalPositionStack);
|
|
91
|
+
case "Other":
|
|
92
|
+
console.log(`Object Extractor ${options.entityName} - Other error: ${result.error.context}`);
|
|
93
|
+
throw new _SdkError.SdkError("Other", originalPositionStack);
|
|
94
|
+
case "RequiredPropertyNotExtracted":
|
|
95
|
+
console.log(`Object Extractor ${options.entityName} - Required property not extracted: ${result.error.context}`);
|
|
96
|
+
throw new _SdkError.SdkError("RequiredPropertyNotExtracted", originalPositionStack);
|
|
97
|
+
case "InsufficientAiCredits":
|
|
98
|
+
console.log(result.error.context);
|
|
99
|
+
throw new _SdkError.SdkError("InsufficientAiCredits", originalPositionStack);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return result.value;
|
|
103
|
+
};
|
|
104
|
+
exports.extractObjectFromLocator = extractObjectFromLocator;
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
import { Locator, Page } from "playwright-core";
|
|
2
|
+
import { BasicSchema } from "./types/jsonSchema";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* this strategy will use a screenshot of the page/locator with some processing to extract the needed data.
|
|
6
|
+
* should be used when the information you're trying to extract is not present in the dom as a text but can be identified visually.
|
|
7
|
+
* @interface
|
|
8
|
+
* @property model - the model to use in the extraction process.
|
|
9
|
+
* @property type - the type of the strategy
|
|
10
|
+
*/
|
|
11
|
+
export interface ImageStrategy {
|
|
12
|
+
model:
|
|
13
|
+
| "claude-3-haiku"
|
|
14
|
+
| "claude-3-haiku-20240307"
|
|
15
|
+
| "claude-3.5-sonnet"
|
|
16
|
+
| "claude-3-5-sonnet-20240620"
|
|
17
|
+
| "claude-3-5-sonnet-20241022"
|
|
18
|
+
| "claude-opus-4"
|
|
19
|
+
| "claude-opus-4-20250514"
|
|
20
|
+
| "claude-sonnet-4"
|
|
21
|
+
| "claude-sonnet-4-20250514"
|
|
22
|
+
| "gpt4-turbo"
|
|
23
|
+
| "gpt-4-turbo-2024-04-09"
|
|
24
|
+
| "gpt-4o"
|
|
25
|
+
| "gpt-4o-2024-05-13"
|
|
26
|
+
| "gpt-4o-mini"
|
|
27
|
+
| "gpt-4o-mini-2024-07-18"
|
|
28
|
+
| "gemini-1.5-pro"
|
|
29
|
+
| "gemini-1.5-pro-002"
|
|
30
|
+
| "gemini-1.5-flash-8b"
|
|
31
|
+
| "gemini-1.5-flash-8b-002"
|
|
32
|
+
| "gemini-1.5-flash"
|
|
33
|
+
| "gemini-1.5-flash-002"
|
|
34
|
+
| "gemini-2.0-flash-exp";
|
|
35
|
+
type: "IMAGE";
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* this strategy will use the html of the page/locator to extract the needed data. we filter out some of the attributes to reduce context.
|
|
39
|
+
* the attributes included are only: `aria-label` `data-name` `name` `type` `placeholder` `value` `role` `title` `href` `id` `alt`,
|
|
40
|
+
*
|
|
41
|
+
* @interface
|
|
42
|
+
* @property model - the model to use in the extraction process
|
|
43
|
+
* @property type - the type of the strategy
|
|
44
|
+
*/
|
|
45
|
+
export interface HtmlStrategy {
|
|
46
|
+
model:
|
|
47
|
+
| "claude-3-haiku"
|
|
48
|
+
| "claude-3-haiku-20240307"
|
|
49
|
+
| "claude-3-5-haiku"
|
|
50
|
+
| "claude-3-5-haiku-20241022"
|
|
51
|
+
| "claude-3.5-sonnet"
|
|
52
|
+
| "claude-3-5-sonnet-20240620"
|
|
53
|
+
| "claude-3-5-sonnet-20241022"
|
|
54
|
+
| "claude-opus-4"
|
|
55
|
+
| "claude-opus-4-20250514"
|
|
56
|
+
| "claude-sonnet-4"
|
|
57
|
+
| "claude-sonnet-4-20250514"
|
|
58
|
+
| "gpt4-turbo"
|
|
59
|
+
| "gpt-4-turbo-2024-04-09"
|
|
60
|
+
| "gpt3.5-turbo"
|
|
61
|
+
| "gpt-3.5-turbo-0125"
|
|
62
|
+
| "gpt-4o"
|
|
63
|
+
| "gpt-4o-2024-05-13"
|
|
64
|
+
| "gpt-4o-mini"
|
|
65
|
+
| "gpt-4o-mini-2024-07-18"
|
|
66
|
+
| "gemini-1.5-pro"
|
|
67
|
+
| "gemini-1.5-pro-002"
|
|
68
|
+
| "gemini-1.5-flash-8b"
|
|
69
|
+
| "gemini-1.5-flash-8b-002"
|
|
70
|
+
| "gemini-1.5-flash"
|
|
71
|
+
| "gemini-1.5-flash-002"
|
|
72
|
+
| "gemini-2.0-flash-exp";
|
|
73
|
+
type: "HTML";
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Extracts an array of structured data from a web page in an optimized way, this function will use ai for the first n times, until it collects multiple examples
|
|
77
|
+
* then it will build reliable selectors in the background to make the process more efficient
|
|
78
|
+
* @deprecated This function is deprecated and will be removed in the future.
|
|
79
|
+
* @param page - The Playwright Page object from which to extract the data.
|
|
80
|
+
* @param options.label - A label for this extraction process, used for billing and monitoring.
|
|
81
|
+
* @param options.itemEntityName - The name of the entity items being extracted, it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
|
|
82
|
+
* @param options.itemEntitySchema - The schema of the entity items being extracted.
|
|
83
|
+
* @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
|
|
84
|
+
* @param options.prompt - Optional. A prompt to guide the extraction process.
|
|
85
|
+
* @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
|
|
86
|
+
* @param options.variantKey - Optional. A variant key for the extraction process, use this when the page has multiple variants/shapes.
|
|
87
|
+
* @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
|
|
88
|
+
* @returns A promise that resolves to a list of extracted data.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```typescript extractArrayFromPage
|
|
92
|
+
* import { extractArrayFromPage } from "@intuned/sdk/optimized-extractors";
|
|
93
|
+
*
|
|
94
|
+
* await page.goto("https://books.toscrape.com/")
|
|
95
|
+
* const books = await extractArrayFromPage(page,
|
|
96
|
+
* {
|
|
97
|
+
* strategy: {
|
|
98
|
+
* model: "gpt4-turbo",
|
|
99
|
+
* type: "HTML"
|
|
100
|
+
* },
|
|
101
|
+
* itemEntityName: "book",
|
|
102
|
+
* label: "books-extraction",
|
|
103
|
+
* itemEntitySchema: {
|
|
104
|
+
* type: "object",
|
|
105
|
+
* required: ["name"],
|
|
106
|
+
* properties: {
|
|
107
|
+
* name: {
|
|
108
|
+
* type: "string",
|
|
109
|
+
* description: "book name",
|
|
110
|
+
* primary: true
|
|
111
|
+
* }
|
|
112
|
+
* }
|
|
113
|
+
* }
|
|
114
|
+
* },
|
|
115
|
+
* )
|
|
116
|
+
*
|
|
117
|
+
* console.log(books)
|
|
118
|
+
*
|
|
119
|
+
* // output:
|
|
120
|
+
* // [
|
|
121
|
+
* // ...
|
|
122
|
+
* // { name: 'Olio' },
|
|
123
|
+
* // { name: 'Mesaerion: The Best Science Fiction Stories 1800-1849' },
|
|
124
|
+
* // { name: 'Libertarianism for Beginners' },
|
|
125
|
+
* // { name: "It's Only the Himalayas" }
|
|
126
|
+
* // ...
|
|
127
|
+
* // ]
|
|
128
|
+
*
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
export declare function extractArrayFromPage(
|
|
132
|
+
page: Page,
|
|
133
|
+
options: {
|
|
134
|
+
label: string;
|
|
135
|
+
itemEntityName: string;
|
|
136
|
+
itemEntitySchema: SimpleArrayItemSchema;
|
|
137
|
+
strategy?: ImageStrategy | HtmlStrategy;
|
|
138
|
+
prompt?: string;
|
|
139
|
+
optionalPropertiesInvalidator?: (
|
|
140
|
+
result: Record<string, string>[]
|
|
141
|
+
) => string[];
|
|
142
|
+
variantKey?: string;
|
|
143
|
+
apiKey?: string;
|
|
144
|
+
}
|
|
145
|
+
): Promise<Record<string, string>[]>;
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Extracts an array of structured data from a locator.
|
|
149
|
+
* @deprecated This function is deprecated and will be removed in the future.
|
|
150
|
+
* @param locator - The Playwright Locator object from which to extract the data.
|
|
151
|
+
* @param options.label - A label for this extraction process, used for billing and monitoring.
|
|
152
|
+
* @param options.itemEntityName - The name of the entity items being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
|
|
153
|
+
* @param options.itemEntitySchema - The schema of the entity items being extracted.
|
|
154
|
+
* @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
|
|
155
|
+
* @param options.prompt - Optional. A prompt to guide the extraction process.
|
|
156
|
+
* @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
|
|
157
|
+
* @param options.variantKey - Optional. A variant key for the extraction process.
|
|
158
|
+
* @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
|
|
159
|
+
* @returns A promise that resolves to a list of extracted data.
|
|
160
|
+
*
|
|
161
|
+
* @example
|
|
162
|
+
* ```typescript extractArrayFromLocator
|
|
163
|
+
* import { extractArrayFromLocator } from "@intuned/sdk/optimized-extractors";
|
|
164
|
+
*
|
|
165
|
+
* await page.goto("https://books.toscrape.com/")
|
|
166
|
+
* const books = await extractArrayFromLocator(page.locator("section"),
|
|
167
|
+
* {
|
|
168
|
+
* itemEntityName: "book",
|
|
169
|
+
* label: "books-extraction",
|
|
170
|
+
* itemEntitySchema: {
|
|
171
|
+
* type: "object",
|
|
172
|
+
* required: ["name"],
|
|
173
|
+
* properties: {
|
|
174
|
+
* name: {
|
|
175
|
+
* type: "string",
|
|
176
|
+
* description: "book name",
|
|
177
|
+
* primary: true
|
|
178
|
+
* }
|
|
179
|
+
* }
|
|
180
|
+
* }
|
|
181
|
+
* },
|
|
182
|
+
* )
|
|
183
|
+
*
|
|
184
|
+
* console.log(books)
|
|
185
|
+
*
|
|
186
|
+
* // output:
|
|
187
|
+
* // [
|
|
188
|
+
* // ...
|
|
189
|
+
* // { name: 'Olio' },
|
|
190
|
+
* // { name: 'Mesaerion: The Best Science Fiction Stories 1800-1849' },
|
|
191
|
+
* // { name: 'Libertarianism for Beginners' },
|
|
192
|
+
* // { name: "It's Only the Himalayas" }
|
|
193
|
+
* // ...
|
|
194
|
+
* // ]
|
|
195
|
+
*
|
|
196
|
+
* ```
|
|
197
|
+
*/
|
|
198
|
+
export declare function extractArrayFromLocator(
|
|
199
|
+
locator: Locator,
|
|
200
|
+
options: {
|
|
201
|
+
label: string;
|
|
202
|
+
itemEntityName: string;
|
|
203
|
+
itemEntitySchema: SimpleArrayItemSchema;
|
|
204
|
+
strategy?: ImageStrategy | HtmlStrategy;
|
|
205
|
+
prompt?: string;
|
|
206
|
+
optionalPropertiesInvalidator?: (
|
|
207
|
+
result: Record<string, string>[]
|
|
208
|
+
) => string[];
|
|
209
|
+
variantKey?: string;
|
|
210
|
+
apiKey?: string;
|
|
211
|
+
}
|
|
212
|
+
): Promise<Record<string, string>[]>;
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* A simple object schema with string properties.
|
|
216
|
+
* @interface SimpleObjectStringSchema
|
|
217
|
+
* @extends BasicSchema
|
|
218
|
+
* @property type - The type of the schema, which is always "string".
|
|
219
|
+
*/
|
|
220
|
+
interface SimpleObjectStringSchema extends BasicSchema {
|
|
221
|
+
type: "string";
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* A simple array schema with string properties.
|
|
226
|
+
* @interface SimpleArrayStringSchema
|
|
227
|
+
* @extends BasicSchema
|
|
228
|
+
* @property type - The type of the schema, which is always "string".
|
|
229
|
+
* @property [primary] - Optional. Indicates whether this is a primary property.
|
|
230
|
+
*/
|
|
231
|
+
interface SimpleArrayStringSchema extends BasicSchema {
|
|
232
|
+
type: "string";
|
|
233
|
+
primary?: boolean;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* A simple object schema with properties.
|
|
238
|
+
* @interface SimpleObjectSchema
|
|
239
|
+
* @extends BasicSchema
|
|
240
|
+
* @property type - The type of the schema, which is always "object".
|
|
241
|
+
* @property properties - The properties of the object.
|
|
242
|
+
* @property required - The required properties of the object.
|
|
243
|
+
*/
|
|
244
|
+
export interface SimpleObjectSchema extends BasicSchema {
|
|
245
|
+
type: "object";
|
|
246
|
+
properties: Record<string, SimpleObjectStringSchema>;
|
|
247
|
+
required: string[];
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* A simple array item schema with properties.
|
|
252
|
+
* @interface SimpleArrayItemSchema
|
|
253
|
+
* @extends BasicSchema
|
|
254
|
+
* @property type - The type of the schema, which is always "object".
|
|
255
|
+
* @property properties - The properties of the array item.
|
|
256
|
+
* @property required - The required properties of the array item.
|
|
257
|
+
*/
|
|
258
|
+
export interface SimpleArrayItemSchema extends BasicSchema {
|
|
259
|
+
type: "object";
|
|
260
|
+
properties: Record<string, SimpleArrayStringSchema>;
|
|
261
|
+
required: string[];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Extracts a structured object from a web page.
|
|
266
|
+
* @deprecated This function is deprecated and will be removed in the future.
|
|
267
|
+
* @param page - The Playwright Page object from which to extract the data.
|
|
268
|
+
* @param options.label - A label for this extraction process, used for billing and monitoring.
|
|
269
|
+
* @param options.entityName - The name of the entity being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
|
|
270
|
+
* @param options.entitySchema - The schema of the entity being extracted.
|
|
271
|
+
* @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
|
|
272
|
+
* @param options.prompt - Optional. A prompt to guide the extraction process.
|
|
273
|
+
* @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
|
|
274
|
+
* @param options.variantKey - Optional. A variant key for the extraction process.
|
|
275
|
+
* @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
|
|
276
|
+
* @returns A promise that resolves to the extracted object.
|
|
277
|
+
* @example
|
|
278
|
+
* ```typescript extractObjectFromPage
|
|
279
|
+
* import { extractObjectFromPage } from "@intuned/sdk/optimized-extractors";
|
|
280
|
+
*
|
|
281
|
+
* await page.goto("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html")
|
|
282
|
+
* const book = await extractObjectFromPage(page,
|
|
283
|
+
* {
|
|
284
|
+
* entityName: "book",
|
|
285
|
+
* label: "book-extraction",
|
|
286
|
+
* entitySchema: {
|
|
287
|
+
* type: "object",
|
|
288
|
+
* required: ["name","price","reviews"],
|
|
289
|
+
* properties: {
|
|
290
|
+
* name: {
|
|
291
|
+
* type: "string",
|
|
292
|
+
* description: "book name",
|
|
293
|
+
* },
|
|
294
|
+
* price: {
|
|
295
|
+
* type: "string",
|
|
296
|
+
* description: "book price"
|
|
297
|
+
* },
|
|
298
|
+
* reviews: {
|
|
299
|
+
* type: "string",
|
|
300
|
+
* description: "Number of reviews"
|
|
301
|
+
* }
|
|
302
|
+
*
|
|
303
|
+
* }
|
|
304
|
+
* }
|
|
305
|
+
* },
|
|
306
|
+
* )
|
|
307
|
+
*
|
|
308
|
+
* console.log(book)
|
|
309
|
+
*
|
|
310
|
+
* // output:
|
|
311
|
+
* // { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }
|
|
312
|
+
*
|
|
313
|
+
* ```
|
|
314
|
+
*/
|
|
315
|
+
export declare function extractObjectFromPage(
|
|
316
|
+
page: Page,
|
|
317
|
+
options: {
|
|
318
|
+
label: string;
|
|
319
|
+
entityName: string;
|
|
320
|
+
entitySchema: SimpleObjectSchema;
|
|
321
|
+
strategy?: ImageStrategy | HtmlStrategy;
|
|
322
|
+
prompt?: string;
|
|
323
|
+
optionalPropertiesInvalidator?: (
|
|
324
|
+
result: Record<string, string | null> | null
|
|
325
|
+
) => string[];
|
|
326
|
+
variantKey?: string;
|
|
327
|
+
apiKey?: string;
|
|
328
|
+
}
|
|
329
|
+
): Promise<Record<string, string | null> | null>;
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Extracts a structured object from a locator.
|
|
333
|
+
* @deprecated This function is deprecated and will be removed in the future.
|
|
334
|
+
* @param locator - The Playwright Locator object from which to extract the data.
|
|
335
|
+
* @param options.label - A label for this extraction process, used for billing and monitoring.
|
|
336
|
+
* @param options.entityName - The name of the entity being extracted. it must be between 1 and 50 characters long and can only contain letters, digits, periods, underscores, and hyphens.
|
|
337
|
+
* @param options.entitySchema - The schema of the entity being extracted.
|
|
338
|
+
* @param options.strategy - Optional. The strategy to use for extraction, if not provided, the html strategy with claude haiku will be used.
|
|
339
|
+
* @param options.prompt - Optional. A prompt to guide the extraction process.
|
|
340
|
+
* @param options.optionalPropertiesInvalidator - Optional. A function to invalidate optional properties.
|
|
341
|
+
* @param options.variantKey - Optional. A variant key for the extraction process.
|
|
342
|
+
* @param options.apiKey - Optional. An API key to use for the AI extraction. Extractions made with you API key will not be billed to your account.
|
|
343
|
+
* @returns A promise that resolves to the extracted object.
|
|
344
|
+
*
|
|
345
|
+
* @example
|
|
346
|
+
* ```typescript extractObjectFromLocator
|
|
347
|
+
* import { extractObjectFromLocator } from "@intuned/sdk/optimized-extractors";
|
|
348
|
+
*
|
|
349
|
+
* await page.goto("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html")
|
|
350
|
+
* const book = await extractObjectFromLocator(page.locator(".page_inner"),
|
|
351
|
+
* {
|
|
352
|
+
* entityName: "book",
|
|
353
|
+
* label: "book-extraction",
|
|
354
|
+
* entitySchema: {
|
|
355
|
+
* type: "object",
|
|
356
|
+
* required: ["name","price","reviews"],
|
|
357
|
+
* properties: {
|
|
358
|
+
* name: {
|
|
359
|
+
* type: "string",
|
|
360
|
+
* description: "book name",
|
|
361
|
+
* },
|
|
362
|
+
* price: {
|
|
363
|
+
* type: "string",
|
|
364
|
+
* description: "book price"
|
|
365
|
+
* },
|
|
366
|
+
* reviews: {
|
|
367
|
+
* type: "string",
|
|
368
|
+
* description: "Number of reviews"
|
|
369
|
+
* }
|
|
370
|
+
*
|
|
371
|
+
* }
|
|
372
|
+
* }
|
|
373
|
+
* },
|
|
374
|
+
* )
|
|
375
|
+
*
|
|
376
|
+
* console.log(book)
|
|
377
|
+
*
|
|
378
|
+
* // output:
|
|
379
|
+
* // { name: 'A Light in the Attic', price: '£51.77', reviews: '0' }
|
|
380
|
+
*
|
|
381
|
+
* ```
|
|
382
|
+
*/
|
|
383
|
+
export declare function extractObjectFromLocator(
|
|
384
|
+
locator: Locator,
|
|
385
|
+
options: {
|
|
386
|
+
label: string;
|
|
387
|
+
entityName: string;
|
|
388
|
+
entitySchema: SimpleObjectSchema;
|
|
389
|
+
strategy?: ImageStrategy | HtmlStrategy;
|
|
390
|
+
prompt?: string;
|
|
391
|
+
optionalPropertiesInvalidator?: (
|
|
392
|
+
result: Record<string, string | null> | null
|
|
393
|
+
) => string[];
|
|
394
|
+
variantKey?: string;
|
|
395
|
+
apiKey?: string;
|
|
396
|
+
}
|
|
397
|
+
): Promise<Record<string, string | null> | null>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
Object.defineProperty(exports, "extractArrayFromLocator", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
get: function () {
|
|
9
|
+
return _extractArray.extractArrayFromLocator;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(exports, "extractArrayFromPage", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return _extractArray.extractArrayFromPage;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(exports, "extractObjectFromLocator", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
get: function () {
|
|
21
|
+
return _extractObject.extractObjectFromLocator;
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(exports, "extractObjectFromPage", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
get: function () {
|
|
27
|
+
return _extractObject.extractObjectFromPage;
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
var _extractArray = require("./extractArray");
|
|
31
|
+
var _extractObject = require("./extractObject");
|