@intuned/browser-dev 2.2.3-test-build.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/LICENSE +43 -0
- package/dist/ai/export.d.js +5 -0
- package/dist/ai/export.d.ts +641 -0
- package/dist/ai/extractStructuredData.js +320 -0
- package/dist/ai/extractStructuredDataUsingAi.js +139 -0
- package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
- package/dist/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/ai/index.d.ts +641 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/isPageLoaded.js +77 -0
- package/dist/ai/prompt.js +39 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
- package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
- package/dist/ai/tools/index.js +48 -0
- package/dist/ai/types/errors.js +67 -0
- package/dist/ai/types/models.js +45 -0
- package/dist/ai/types/types.js +48 -0
- package/dist/ai/validators.js +167 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +32 -0
- package/dist/common/browser_scripts.js +2596 -0
- package/dist/common/ensureBrowserScripts.js +18 -0
- package/dist/common/extendedTest.js +148 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +18 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/clickUntilExhausted.js +85 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +5 -0
- package/dist/helpers/export.d.ts +1220 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +98 -0
- package/dist/helpers/index.d.ts +1220 -0
- package/dist/helpers/index.js +128 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +64 -0
- package/dist/helpers/sanitizeHtml.js +74 -0
- package/dist/helpers/saveFileToS3.js +50 -0
- package/dist/helpers/scrollToLoadContent.js +57 -0
- package/dist/helpers/tests/extendedTest.js +130 -0
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +387 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +204 -0
- package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
- package/dist/helpers/types/Attachment.js +115 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +154 -0
- package/dist/helpers/utils/getS3Client.js +22 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +103 -0
- package/dist/helpers/waitForDomSettled.js +90 -0
- package/dist/helpers/withNetworkSettledWait.js +91 -0
- package/dist/index.d.js +16 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +16 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +99 -0
- package/dist/intunedServices/ApiGateway/factory.js +13 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +224 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +312 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +152 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +240 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/docs.md +14 -0
- package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
- package/generated-docs/helpers/functions/processDate.mdx +55 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
- package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
- package/how-to-run-tests.md +10 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +119 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.strategySchema = exports.simpleObjectJsonSchema = exports.simpleArrayItemJsonSchema = exports.extractObjectOptimizedInputSchema = exports.extractArrayOptimizedInputSchema = void 0;
|
|
7
|
+
var _zod = require("zod");
|
|
8
|
+
var _aiModelsValidation = require("./types/aiModelsValidation");
|
|
9
|
+
const htmlStrategySchema = _zod.z.object({
|
|
10
|
+
model: _zod.z.enum(_aiModelsValidation.SUPPORTED_TEXT_MODELS, {
|
|
11
|
+
required_error: "strategy model is required",
|
|
12
|
+
invalid_type_error: "strategy model is invalid"
|
|
13
|
+
}),
|
|
14
|
+
type: _zod.z.literal("HTML", {
|
|
15
|
+
required_error: "strategy type is required",
|
|
16
|
+
invalid_type_error: "strategy type is invalid"
|
|
17
|
+
})
|
|
18
|
+
});
|
|
19
|
+
const imageStrategySchema = _zod.z.object({
|
|
20
|
+
model: _zod.z.enum(_aiModelsValidation.SUPPORTED_VISION_MODELS, {
|
|
21
|
+
required_error: "strategy model is required",
|
|
22
|
+
invalid_type_error: "strategy model is invalid"
|
|
23
|
+
}),
|
|
24
|
+
type: _zod.z.literal("IMAGE", {
|
|
25
|
+
required_error: "strategy type is required",
|
|
26
|
+
invalid_type_error: "strategy type is invalid"
|
|
27
|
+
})
|
|
28
|
+
});
|
|
29
|
+
const simpleStringSchema = _zod.z.object({
|
|
30
|
+
type: _zod.z.literal("string", {
|
|
31
|
+
required_error: "property type is required",
|
|
32
|
+
invalid_type_error: "optimized extractors only support string types"
|
|
33
|
+
}),
|
|
34
|
+
description: _zod.z.string({
|
|
35
|
+
required_error: "property description is required",
|
|
36
|
+
invalid_type_error: "property description must be a string"
|
|
37
|
+
}).optional()
|
|
38
|
+
});
|
|
39
|
+
const simpleArrayStringSchema = simpleStringSchema.extend({
|
|
40
|
+
primary: _zod.z.boolean().optional()
|
|
41
|
+
});
|
|
42
|
+
const simpleObjectJsonSchema = exports.simpleObjectJsonSchema = _zod.z.object({
|
|
43
|
+
type: _zod.z.literal("object", {
|
|
44
|
+
errorMap: () => ({
|
|
45
|
+
message: 'schema type is required, and must have the value "object"'
|
|
46
|
+
})
|
|
47
|
+
}),
|
|
48
|
+
description: _zod.z.string().optional(),
|
|
49
|
+
properties: _zod.z.record(_zod.z.string(), simpleStringSchema, {
|
|
50
|
+
required_error: "properties is required in object schemas"
|
|
51
|
+
}),
|
|
52
|
+
required: _zod.z.array(_zod.z.string(), {
|
|
53
|
+
required_error: "required must be an array of strings",
|
|
54
|
+
invalid_type_error: "required must be an array of strings"
|
|
55
|
+
}).min(1, {
|
|
56
|
+
message: "at least one property must be required"
|
|
57
|
+
})
|
|
58
|
+
}, {
|
|
59
|
+
required_error: "schema is required"
|
|
60
|
+
}).refine(data => {
|
|
61
|
+
return data.required.every(key => Object.keys(data.properties).includes(key));
|
|
62
|
+
}, {
|
|
63
|
+
message: "All required keys must be defined in the properties object"
|
|
64
|
+
}).refine(data => {
|
|
65
|
+
return Object.keys(data.properties).length > 0;
|
|
66
|
+
}, {
|
|
67
|
+
message: "you must have at least one property in the properties object"
|
|
68
|
+
});
|
|
69
|
+
const simpleArrayItemJsonSchema = exports.simpleArrayItemJsonSchema = _zod.z.object({
|
|
70
|
+
type: _zod.z.literal("object", {
|
|
71
|
+
errorMap: () => ({
|
|
72
|
+
message: 'schema type is required, and must have the value "object"'
|
|
73
|
+
})
|
|
74
|
+
}),
|
|
75
|
+
description: _zod.z.string().optional(),
|
|
76
|
+
properties: _zod.z.record(_zod.z.string(), simpleArrayStringSchema, {
|
|
77
|
+
required_error: "properties is required in object schemas"
|
|
78
|
+
}),
|
|
79
|
+
required: _zod.z.array(_zod.z.string(), {
|
|
80
|
+
required_error: "required must be an array of strings",
|
|
81
|
+
invalid_type_error: "required must be an array of strings"
|
|
82
|
+
}).min(1, {
|
|
83
|
+
message: "at least one property must be required"
|
|
84
|
+
})
|
|
85
|
+
}, {
|
|
86
|
+
required_error: "schema is required"
|
|
87
|
+
}).refine(data => {
|
|
88
|
+
return data.required.every(key => Object.keys(data.properties).includes(key));
|
|
89
|
+
}, {
|
|
90
|
+
message: "All required keys must be defined in the properties object"
|
|
91
|
+
}).refine(data => {
|
|
92
|
+
const primaryField = Object.entries(data.properties).find(([k, v]) => v.primary);
|
|
93
|
+
return primaryField;
|
|
94
|
+
}, {
|
|
95
|
+
message: "you must have one primary property"
|
|
96
|
+
}).refine(data => {
|
|
97
|
+
const primaryField = Object.entries(data.properties).find(([k, v]) => v.primary);
|
|
98
|
+
if (!primaryField) {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
return data.required.includes(primaryField[0]);
|
|
102
|
+
}, {
|
|
103
|
+
message: "The primary field must be required"
|
|
104
|
+
});
|
|
105
|
+
const strategySchema = exports.strategySchema = _zod.z.union([htmlStrategySchema, imageStrategySchema], {
|
|
106
|
+
errorMap: (err, context) => {
|
|
107
|
+
if (err.code === "invalid_union" && context.data.model === "gpt3.5-turbo" && context.data.type === "IMAGE") {
|
|
108
|
+
return {
|
|
109
|
+
message: "gpt3.5-turbo does not support IMAGE strategy"
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
message: err.message ?? "invalid strategy configuration"
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}).optional().default({
|
|
117
|
+
model: "claude-3-haiku",
|
|
118
|
+
type: "HTML"
|
|
119
|
+
});
|
|
120
|
+
const labelSchema = _zod.z.string({
|
|
121
|
+
invalid_type_error: "label must be a string",
|
|
122
|
+
required_error: "label is required"
|
|
123
|
+
}).min(1, "label must be at least 1 character long");
|
|
124
|
+
const entityNameSchema = _zod.z.string().min(1, {
|
|
125
|
+
message: "entity name must be at least 1 character long."
|
|
126
|
+
}).max(50, {
|
|
127
|
+
message: "entity name must be no more than 50 characters long."
|
|
128
|
+
}).regex(/^[a-zA-Z0-9_-]+$/, {
|
|
129
|
+
message: "entity name can only contain letters, digits, underscores, and hyphens."
|
|
130
|
+
});
|
|
131
|
+
const extractObjectOptimizedInputSchema = exports.extractObjectOptimizedInputSchema = _zod.z.object({
|
|
132
|
+
label: labelSchema,
|
|
133
|
+
strategy: strategySchema,
|
|
134
|
+
entityName: entityNameSchema,
|
|
135
|
+
entitySchema: simpleObjectJsonSchema,
|
|
136
|
+
variantKey: _zod.z.string().optional().default("about:blank"),
|
|
137
|
+
prompt: _zod.z.string().optional(),
|
|
138
|
+
optionalPropertiesInvalidator: _zod.z.function().returns(_zod.z.array(_zod.z.string())).optional().default(() => () => []),
|
|
139
|
+
apiKey: _zod.z.string().optional()
|
|
140
|
+
}, {
|
|
141
|
+
required_error: "extractObjectOptimized function missing configurations"
|
|
142
|
+
});
|
|
143
|
+
const extractArrayOptimizedInputSchema = exports.extractArrayOptimizedInputSchema = _zod.z.object({
|
|
144
|
+
label: labelSchema,
|
|
145
|
+
strategy: strategySchema,
|
|
146
|
+
prompt: _zod.z.string().optional(),
|
|
147
|
+
itemEntityName: entityNameSchema,
|
|
148
|
+
itemEntitySchema: simpleArrayItemJsonSchema,
|
|
149
|
+
variantKey: _zod.z.string().optional(),
|
|
150
|
+
optionalPropertiesInvalidator: _zod.z.function().returns(_zod.z.array(_zod.z.string())).optional().default(() => () => []),
|
|
151
|
+
apiKey: _zod.z.string().optional()
|
|
152
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";
|
package/docs.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
to generate all docs run:
|
|
2
|
+
```bash
|
|
3
|
+
yarn generate-all-docs
|
|
4
|
+
```
|
|
5
|
+
|
|
6
|
+
or run:
|
|
7
|
+
```bash
|
|
8
|
+
yarn generate-docs <input.d.tx> [outputdir]
|
|
9
|
+
```
|
|
10
|
+
These commands will go to all export.d.ts file of the 5 namespaces and generate their docs.
|
|
11
|
+
The script in ./scripts/generate-docs reads the JSDOCs in the directories and passes them to the markdown converters. And outputs functions in outputdir/functions and Interfaces at outputdir/interfaces
|
|
12
|
+
The markdown converters scripts parse JSDOCs into a Mintlify compatable format.
|
|
13
|
+
|
|
14
|
+
To write a jsdoc, follow the conventions found in any export.d.ts file, especially the one in `helpers/export.d.ts`.
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: extractStructuredData
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
<Tabs>
|
|
7
|
+
|
|
8
|
+
<Tab title="From Page or Locator">
|
|
9
|
+
|
|
10
|
+
```typescript
|
|
11
|
+
export declare function extractStructuredData(options: {
|
|
12
|
+
source: Page | Locator;
|
|
13
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
14
|
+
prompt?: string;
|
|
15
|
+
strategy?: "IMAGE" | "MARKDOWN" | "HTML";
|
|
16
|
+
enableDomMatching?: boolean;
|
|
17
|
+
enableCache?: boolean;
|
|
18
|
+
maxRetries?: number;
|
|
19
|
+
model?: SUPPORTED_MODELS;
|
|
20
|
+
apiKey?: string;
|
|
21
|
+
}): Promise<any>;
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Extract structured data from web pages using AI-powered content analysis.
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
|
|
28
|
+
<CodeGroup>
|
|
29
|
+
|
|
30
|
+
```typescript Page source
|
|
31
|
+
import { extractStructuredData } from '@intuned/browser/ai';
|
|
32
|
+
export default async function handler(params, page, context){
|
|
33
|
+
await page.goto("https://books.toscrape.com/")
|
|
34
|
+
const product = await extractStructuredData({
|
|
35
|
+
source: page,
|
|
36
|
+
strategy: "HTML",
|
|
37
|
+
model: "gpt-4o"
|
|
38
|
+
dataSchema: {
|
|
39
|
+
type: "object",
|
|
40
|
+
properties: {
|
|
41
|
+
name: { type: "string" },
|
|
42
|
+
price: { type: "string" },
|
|
43
|
+
description: { type: "string" },
|
|
44
|
+
inStock: { type: "boolean" }
|
|
45
|
+
},
|
|
46
|
+
required: ["name", "price"]
|
|
47
|
+
},
|
|
48
|
+
prompt: "Extract product details from this e page"
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
console.log(`Found book: ${product.name} - ${product.price}`);
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```typescript Locator source
|
|
55
|
+
import { extractStructuredData } from '@intuned/browser/ai';
|
|
56
|
+
export default async function handler(params, page, context){
|
|
57
|
+
await page.goto("https://books.toscrape.com/")
|
|
58
|
+
const articleContainer = page.locator("article").first()
|
|
59
|
+
const article = await extractStructuredData({
|
|
60
|
+
source: articleContainer,
|
|
61
|
+
strategy: "MARKDOWN",
|
|
62
|
+
model: "claude-3",
|
|
63
|
+
dataSchema: {
|
|
64
|
+
type: "object",
|
|
65
|
+
properties: {
|
|
66
|
+
title: { type: "string" },
|
|
67
|
+
author: { type: "string" },
|
|
68
|
+
publishDate: { type: "string" },
|
|
69
|
+
content: { type: "string" },
|
|
70
|
+
},
|
|
71
|
+
required: ["title"]
|
|
72
|
+
},
|
|
73
|
+
maxRetries: 5
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
console.log(`Found book: ${article.title}`);
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
</CodeGroup>
|
|
80
|
+
|
|
81
|
+
## Arguments
|
|
82
|
+
|
|
83
|
+
<ParamField path="options" type="Object" required
|
|
84
|
+
>
|
|
85
|
+
Configuration object containing extraction parameters
|
|
86
|
+
|
|
87
|
+
<Expandable title="options">
|
|
88
|
+
<ParamField path="options.source" type="Page | Locator">
|
|
89
|
+
Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
|
|
90
|
+
</ParamField>
|
|
91
|
+
|
|
92
|
+
<ParamField path="options.dataSchema" type="JsonSchema | z.ZodSchema">
|
|
93
|
+
JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
|
|
94
|
+
</ParamField>
|
|
95
|
+
|
|
96
|
+
<ParamField path="options.strategy" type="string">
|
|
97
|
+
Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
|
|
98
|
+
</ParamField>
|
|
99
|
+
|
|
100
|
+
<ParamField path="options.prompt" type="string">
|
|
101
|
+
Optional prompt to guide the extraction process and provide more context
|
|
102
|
+
</ParamField>
|
|
103
|
+
|
|
104
|
+
<ParamField path="options.enableDomMatching" type="boolean">
|
|
105
|
+
Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
106
|
+
</ParamField>
|
|
107
|
+
|
|
108
|
+
<ParamField path="options.enableCache" type="boolean">
|
|
109
|
+
Whether to enable caching of the extracted data. Defaults to true
|
|
110
|
+
</ParamField>
|
|
111
|
+
|
|
112
|
+
<ParamField path="options.maxRetries" type="number">
|
|
113
|
+
Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
114
|
+
</ParamField>
|
|
115
|
+
|
|
116
|
+
<ParamField path="options.model" type="SUPPORTED_MODELS">
|
|
117
|
+
AI model to use for extraction. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "claude-3-5-haiku-latest"
|
|
118
|
+
</ParamField>
|
|
119
|
+
|
|
120
|
+
<ParamField path="options.apiKey" type="string">
|
|
121
|
+
Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
122
|
+
</ParamField>
|
|
123
|
+
|
|
124
|
+
</Expandable>
|
|
125
|
+
|
|
126
|
+
</ParamField>
|
|
127
|
+
|
|
128
|
+
## Returns: `any`
|
|
129
|
+
|
|
130
|
+
Promise resolving to the extracted structured data matching the provided schema
|
|
131
|
+
|
|
132
|
+
</Tab>
|
|
133
|
+
|
|
134
|
+
<Tab title="From Content">
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
export declare function extractStructuredData(options: {
|
|
138
|
+
content: ContentItem[] | ContentItem;
|
|
139
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
140
|
+
prompt?: string;
|
|
141
|
+
maxRetires?: number;
|
|
142
|
+
enableCache?: boolean;
|
|
143
|
+
model: SUPPORTED_MODELS;
|
|
144
|
+
apiKey?: string;
|
|
145
|
+
}): Promise<any>;
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Extract structured data from content items (text, images) using AI-powered analysis.
|
|
149
|
+
|
|
150
|
+
## Examples
|
|
151
|
+
|
|
152
|
+
<CodeGroup>
|
|
153
|
+
|
|
154
|
+
```typescript Text Content
|
|
155
|
+
import { extractStructuredData } from '@intuned/browser/ai';
|
|
156
|
+
export default async function handler(params, page, context){
|
|
157
|
+
const textContent: TextContentItem = {
|
|
158
|
+
type: "text",
|
|
159
|
+
data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const person = await extractStructuredData({
|
|
163
|
+
content: textContent,
|
|
164
|
+
model: "gpt-4o",
|
|
165
|
+
dataSchema: {
|
|
166
|
+
type: "object",
|
|
167
|
+
properties: {
|
|
168
|
+
name: { type: "string" },
|
|
169
|
+
age: { type: "number" },
|
|
170
|
+
occupation: { type: "string" },
|
|
171
|
+
company: { type: "string" }
|
|
172
|
+
},
|
|
173
|
+
required: ["name"]
|
|
174
|
+
},
|
|
175
|
+
prompt: "Extract person information from the text"
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
console.log(`Found person: ${person.name}, ${person.age} years old`);
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
```typescript Multiple Content Items
|
|
183
|
+
import { extractStructuredData } from '@intuned/browser/ai';
|
|
184
|
+
export default async function handler(params, page, context){
|
|
185
|
+
const mixedContent = [
|
|
186
|
+
{ type: "text", data: "Product: iPhone 15" },
|
|
187
|
+
{ type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
const product = await extractStructuredData({
|
|
192
|
+
content: mixedContent,
|
|
193
|
+
model: "claude-3",
|
|
194
|
+
dataSchema: {
|
|
195
|
+
type: "object",
|
|
196
|
+
properties: {
|
|
197
|
+
name: { type: "string" },
|
|
198
|
+
price: { type: "string" },
|
|
199
|
+
features: { type: "array", items: { type: "string" } }
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
maxRetries: 1,
|
|
203
|
+
enableCache: true
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
</CodeGroup>
|
|
209
|
+
|
|
210
|
+
## Arguments
|
|
211
|
+
|
|
212
|
+
<ParamField path="options" type="Object" required
|
|
213
|
+
>
|
|
214
|
+
Configuration object containing extraction parameters
|
|
215
|
+
|
|
216
|
+
<Expandable title="options">
|
|
217
|
+
<ParamField path="options.content" type="Array<ContentItem> | ContentItem">
|
|
218
|
+
Content to extract data from - can be a single content item or array of content items
|
|
219
|
+
</ParamField>
|
|
220
|
+
|
|
221
|
+
<ParamField path="options.dataSchema" type="JsonSchema | z.ZodSchema">
|
|
222
|
+
JsonSchema defining the structure of the data to extract
|
|
223
|
+
</ParamField>
|
|
224
|
+
|
|
225
|
+
<ParamField path="options.prompt" type="string">
|
|
226
|
+
Optional prompt to guide the extraction process and provide more context
|
|
227
|
+
</ParamField>
|
|
228
|
+
|
|
229
|
+
<ParamField path="options.enableCache" type="boolean">
|
|
230
|
+
Whether to enable caching of the extracted data. Defaults to true
|
|
231
|
+
</ParamField>
|
|
232
|
+
|
|
233
|
+
<ParamField path="options.maxRetries" type="number">
|
|
234
|
+
Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
235
|
+
</ParamField>
|
|
236
|
+
|
|
237
|
+
<ParamField path="options.model" type="SUPPORTED_MODELS">
|
|
238
|
+
AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models
|
|
239
|
+
</ParamField>
|
|
240
|
+
|
|
241
|
+
<ParamField path="options.apiKey" type="string">
|
|
242
|
+
Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
243
|
+
</ParamField>
|
|
244
|
+
|
|
245
|
+
</Expandable>
|
|
246
|
+
|
|
247
|
+
</ParamField>
|
|
248
|
+
|
|
249
|
+
## Returns: `any`
|
|
250
|
+
|
|
251
|
+
Promise resolving to the extracted structured data matching the provided schema
|
|
252
|
+
|
|
253
|
+
</Tab>
|
|
254
|
+
|
|
255
|
+
</Tabs>
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: isPageLoaded
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export declare function isPageLoaded(input: {
|
|
8
|
+
page: Page;
|
|
9
|
+
timeoutInMs?: number;
|
|
10
|
+
model?: SUPPORTED_MODELS;
|
|
11
|
+
apiKey?: string;
|
|
12
|
+
}): Promise<boolean>;
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
16
|
+
Detects loading spinners, blank content, or incomplete page states.
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
|
|
20
|
+
<CodeGroup>
|
|
21
|
+
|
|
22
|
+
```typescript Check Page Loading
|
|
23
|
+
import { isPageLoaded } from "@intuned/browser/ai";
|
|
24
|
+
export default async function handler(params, page, context){
|
|
25
|
+
// Wait for page to finish loading
|
|
26
|
+
await page.goto('https://example.com');
|
|
27
|
+
|
|
28
|
+
const pageLoaded = await isPageLoaded({page});
|
|
29
|
+
if (pageLoaded) {
|
|
30
|
+
// Continue with scraping or interactions
|
|
31
|
+
} else {
|
|
32
|
+
// Wait longer or retry
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
```typescript Loading Loop
|
|
38
|
+
import { isPageLoaded } from "@intuned/browser/ai";
|
|
39
|
+
export default async function handler(params, page, context){
|
|
40
|
+
// Keep checking until page loads
|
|
41
|
+
await page.goto("https://example.com");
|
|
42
|
+
let attempts = 0;
|
|
43
|
+
while (attempts < 10) {
|
|
44
|
+
const pageLoaded = await isPageLoaded({
|
|
45
|
+
page,
|
|
46
|
+
model: "gpt-4o",
|
|
47
|
+
timeoutInMs: 5000
|
|
48
|
+
});
|
|
49
|
+
if (pageLoaded) break;
|
|
50
|
+
|
|
51
|
+
await page.waitForTimeout(2000);
|
|
52
|
+
attempts++;
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
</CodeGroup>
|
|
58
|
+
|
|
59
|
+
## Arguments
|
|
60
|
+
|
|
61
|
+
<ParamField path="input" type="Object" required
|
|
62
|
+
>
|
|
63
|
+
Input object containing the page to check
|
|
64
|
+
|
|
65
|
+
<Expandable title="input">
|
|
66
|
+
<ParamField path="input.page" type="Page">
|
|
67
|
+
The Playwright page to check
|
|
68
|
+
</ParamField>
|
|
69
|
+
|
|
70
|
+
<ParamField path="input.timeoutInMs" type="number">
|
|
71
|
+
Screenshot timeout in milliseconds. Defaults to 10000
|
|
72
|
+
</ParamField>
|
|
73
|
+
|
|
74
|
+
<ParamField path="input.model" type="SUPPORTED_MODELS">
|
|
75
|
+
AI model to use for the check. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "gpt-4o-2024-08-06"
|
|
76
|
+
</ParamField>
|
|
77
|
+
|
|
78
|
+
<ParamField path="input.apiKey" type="string">
|
|
79
|
+
Optional API key for the AI service (if provided, will not be billed to your account)
|
|
80
|
+
</ParamField>
|
|
81
|
+
|
|
82
|
+
</Expandable>
|
|
83
|
+
|
|
84
|
+
</ParamField>
|
|
85
|
+
|
|
86
|
+
## Returns: `Promise<boolean>`
|
|
87
|
+
|
|
88
|
+
Promise resolving to true if page is loaded, false if still loading
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: ArraySchema
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface ArraySchema extends BasicSchema {
|
|
8
|
+
type: "array";
|
|
9
|
+
items: JsonSchema | z.ZodSchema;
|
|
10
|
+
maxItems?: number;
|
|
11
|
+
minItems?: number;
|
|
12
|
+
uniqueItems?: boolean;
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Schema definition for array values with item validation and constraints.
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
|
|
20
|
+
<CodeGroup>
|
|
21
|
+
|
|
22
|
+
```typescript Array Schema
|
|
23
|
+
import { ArraySchema } from "@intuned/browser/ai";
|
|
24
|
+
export default async function handler(params, page, context){
|
|
25
|
+
const tagsSchema: ArraySchema = {
|
|
26
|
+
type: "array",
|
|
27
|
+
items: { type: "string" },
|
|
28
|
+
minItems: 1,
|
|
29
|
+
maxItems: 10,
|
|
30
|
+
uniqueItems: true,
|
|
31
|
+
description: "List of tags"
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
</CodeGroup>
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: BasicSchema
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface BasicSchema {
|
|
8
|
+
type: "string" | "number" | "integer" | "boolean" | "array" | "object";
|
|
9
|
+
description?: string;
|
|
10
|
+
}
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Base schema interface that all JSON schema types extend from.
|
|
14
|
+
Provides common properties like type and description.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: BooleanSchema
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface BooleanSchema extends BasicSchema {
|
|
8
|
+
type: "boolean";
|
|
9
|
+
}
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Schema definition for boolean values.
|
|
13
|
+
|
|
14
|
+
## Examples
|
|
15
|
+
|
|
16
|
+
<CodeGroup>
|
|
17
|
+
|
|
18
|
+
```typescript Boolean Schema
|
|
19
|
+
import { BooleanSchema } from "@intuned/browser/ai";
|
|
20
|
+
export default async function handler(params, page, context){
|
|
21
|
+
const isActiveSchema: BooleanSchema = {
|
|
22
|
+
type: "boolean",
|
|
23
|
+
description: "Whether the user account is active"
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
</CodeGroup>
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: ImageBufferContentItem
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface ImageBufferContentItem {
|
|
8
|
+
type: "image-buffer";
|
|
9
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
10
|
+
data: Buffer;
|
|
11
|
+
}
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Represents image content provided as a Buffer for AI extraction.
|
|
15
|
+
Used when passing image data directly to extractStructuredData without a page source.
|
|
16
|
+
The image will be analyzed by AI vision models for data extraction.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: ImageUrlContentItem
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface ImageUrlContentItem {
|
|
8
|
+
type: "image-url";
|
|
9
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
10
|
+
data: string;
|
|
11
|
+
}
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Represents image content provided as a URL for AI extraction.
|
|
15
|
+
Used when passing image URLs directly to extractStructuredData without a page source.
|
|
16
|
+
The image will be fetched from the URL and analyzed by AI vision models for data extraction.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: NumberSchema
|
|
3
|
+
description: ""
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
```typescript
|
|
7
|
+
export interface NumberSchema extends BasicSchema {
|
|
8
|
+
type: "number" | "integer";
|
|
9
|
+
multipleOf?: number;
|
|
10
|
+
maximum?: number;
|
|
11
|
+
exclusiveMaximum?: number;
|
|
12
|
+
minimum?: number;
|
|
13
|
+
exclusiveMinimum?: number;
|
|
14
|
+
}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Schema definition for numeric values (numbers and integers) with validation constraints.
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
<CodeGroup>
|
|
22
|
+
|
|
23
|
+
```typescript Number Schema
|
|
24
|
+
import { NumberSchema } from "@intuned/browser/ai";
|
|
25
|
+
export default async function handler(params, page, context){
|
|
26
|
+
const ageSchema: NumberSchema = {
|
|
27
|
+
type: "integer",
|
|
28
|
+
minimum: 0,
|
|
29
|
+
maximum: 150,
|
|
30
|
+
description: "Person's age in years"
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
</CodeGroup>
|