@intuned/browser-dev 0.1.4-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc +21 -0
- package/.eslintignore +10 -0
- package/.eslintrc.js +39 -0
- package/BROWSER_SCRIPTS_SETUP.md +84 -0
- package/LICENSE +43 -0
- package/README.md +160 -0
- package/RELEASE.md +60 -0
- package/dist/ai/export.d.js +5 -0
- package/dist/ai/export.d.ts +641 -0
- package/dist/ai/extractStructuredData.js +320 -0
- package/dist/ai/extractStructuredDataUsingAi.js +142 -0
- package/dist/ai/extractionHelpers/screenshotHelpers.js +56 -0
- package/dist/ai/extractionHelpers/validateSchema.js +148 -0
- package/dist/ai/index.d.ts +641 -0
- package/dist/ai/index.js +19 -0
- package/dist/ai/isPageLoaded.js +80 -0
- package/dist/ai/prompt.js +39 -0
- package/dist/ai/tests/testCheckAllTypesAreStrings.spec.js +137 -0
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +646 -0
- package/dist/ai/tests/testIsPageLoaded.spec.js +277 -0
- package/dist/ai/tools/index.js +48 -0
- package/dist/ai/types/errors.js +67 -0
- package/dist/ai/types/models.js +45 -0
- package/dist/ai/types/types.js +48 -0
- package/dist/ai/validators.js +167 -0
- package/dist/common/Logger/index.js +60 -0
- package/dist/common/Logger/types.js +5 -0
- package/dist/common/SdkError.js +50 -0
- package/dist/common/aiModelsValidations.js +32 -0
- package/dist/common/ensureBrowserScripts.js +14 -0
- package/dist/common/extendedTest.js +157 -0
- package/dist/common/extractionHelpers.js +19 -0
- package/dist/common/formatZodError.js +18 -0
- package/dist/common/fuzzySearch/fuzzySearch.test.js +250 -0
- package/dist/common/fuzzySearch/levenshtein-search.js +298 -0
- package/dist/common/fuzzySearch/utils.js +23 -0
- package/dist/common/getModelProvider.js +18 -0
- package/dist/common/getSimplifiedHtml.js +122 -0
- package/dist/common/hashObject.js +32 -0
- package/dist/common/html2markdown/convertElementToMarkdown.js +469 -0
- package/dist/common/html2markdown/index.js +19 -0
- package/dist/common/jwtTokenManager.js +57 -0
- package/dist/common/loadRuntime.js +16 -0
- package/dist/common/locatorHelpers.js +41 -0
- package/dist/common/matching/collectStrings.js +32 -0
- package/dist/common/matching/levenshtein.js +40 -0
- package/dist/common/matching/matching.js +317 -0
- package/dist/common/matching/types.js +1 -0
- package/dist/common/noEmpty.js +9 -0
- package/dist/common/saveSnapshotWithExamples.js +60 -0
- package/dist/common/script.js +2602 -0
- package/dist/common/tests/testEnsureBrowserScript.spec.js +31 -0
- package/dist/common/xpathMapping.js +107 -0
- package/dist/helpers/clickUntilExhausted.js +85 -0
- package/dist/helpers/downloadFile.js +125 -0
- package/dist/helpers/export.d.js +5 -0
- package/dist/helpers/export.d.ts +1220 -0
- package/dist/helpers/extractMarkdown.js +35 -0
- package/dist/helpers/filterEmptyValues.js +54 -0
- package/dist/helpers/gotoUrl.js +98 -0
- package/dist/helpers/index.d.ts +1220 -0
- package/dist/helpers/index.js +122 -0
- package/dist/helpers/processDate.js +25 -0
- package/dist/helpers/resolveUrl.js +64 -0
- package/dist/helpers/sanitizeHtml.js +74 -0
- package/dist/helpers/saveFileToS3.js +50 -0
- package/dist/helpers/scrollToLoadContent.js +57 -0
- package/dist/helpers/tests/testClickUntilExhausted.spec.js +372 -0
- package/dist/helpers/tests/testDownloadFile.spec.js +206 -0
- package/dist/helpers/tests/testExtractMarkdown.spec.js +290 -0
- package/dist/helpers/tests/testFilterEmptyValues.spec.js +151 -0
- package/dist/helpers/tests/testGoToUrl.spec.js +37 -0
- package/dist/helpers/tests/testProcessDate.spec.js +13 -0
- package/dist/helpers/tests/testResolveUrl.spec.js +341 -0
- package/dist/helpers/tests/testSanitizeHtml.spec.js +330 -0
- package/dist/helpers/tests/testScrollToLoadContent.spec.js +163 -0
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +342 -0
- package/dist/helpers/tests/testWithDomSettledWait.spec.js +164 -0
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +114 -0
- package/dist/helpers/types/Attachment.js +115 -0
- package/dist/helpers/types/CustomTypeRegistry.js +48 -0
- package/dist/helpers/types/RunEnvironment.js +18 -0
- package/dist/helpers/types/ValidationError.js +17 -0
- package/dist/helpers/types/index.js +51 -0
- package/dist/helpers/uploadFileToS3.js +154 -0
- package/dist/helpers/utils/getS3Client.js +22 -0
- package/dist/helpers/utils/index.js +73 -0
- package/dist/helpers/utils/isDownload.js +10 -0
- package/dist/helpers/utils/isGenerateCodeMode.js +9 -0
- package/dist/helpers/utils/isLocator.js +9 -0
- package/dist/helpers/utils/jwtTokenManager.js +18 -0
- package/dist/helpers/validateDataUsingSchema.js +103 -0
- package/dist/helpers/waitForDomSettled.js +90 -0
- package/dist/helpers/withNetworkSettledWait.js +91 -0
- package/dist/index.d.js +16 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +16 -0
- package/dist/intunedServices/ApiGateway/aiApiGateway.js +143 -0
- package/dist/intunedServices/ApiGateway/factory.js +16 -0
- package/dist/intunedServices/ApiGateway/providers/Anthropic.js +26 -0
- package/dist/intunedServices/ApiGateway/providers/Gemini.js +29 -0
- package/dist/intunedServices/ApiGateway/providers/OpenAI.js +29 -0
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +355 -0
- package/dist/intunedServices/ApiGateway/types.js +11 -0
- package/dist/intunedServices/cache/cache.js +61 -0
- package/dist/intunedServices/cache/index.js +12 -0
- package/dist/intunedServices/cache/tests/testCache.spec.js +117 -0
- package/dist/optimized-extractors/common/buildExamplesPrompt.js +12 -0
- package/dist/optimized-extractors/common/buildImagesFromPage.js +55 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +135 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingGoogle.js +37 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +132 -0
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +122 -0
- package/dist/optimized-extractors/common/findTableHeaders.js +162 -0
- package/dist/optimized-extractors/common/index.js +55 -0
- package/dist/optimized-extractors/common/isTableHeaderOrFooter.js +84 -0
- package/dist/optimized-extractors/common/matching/matching.js +212 -0
- package/dist/optimized-extractors/common/matching/matching.test.js +655 -0
- package/dist/optimized-extractors/common/matching/types.js +18 -0
- package/dist/optimized-extractors/common/matching/utils.js +184 -0
- package/dist/optimized-extractors/common/utils.js +58 -0
- package/dist/optimized-extractors/export.d.js +5 -0
- package/dist/optimized-extractors/export.d.ts +397 -0
- package/dist/optimized-extractors/extractArray.js +120 -0
- package/dist/optimized-extractors/extractObject.js +104 -0
- package/dist/optimized-extractors/index.d.ts +397 -0
- package/dist/optimized-extractors/index.js +31 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/dynamicListExtractor.spec.js +269 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/findSetOfXpathsToCreateAnArrayExtractor.test.js +22 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/getContainerElement.test.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/partOfSameArrayXpath.test.js +42 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromLocator.spec.js +146 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/testArrayExtractorFromPage.spec.js +130 -0
- package/dist/optimized-extractors/listExtractionHelpers/__tests__/verifyThatAllXpathsArePartOfSameArray.test.js +9 -0
- package/dist/optimized-extractors/listExtractionHelpers/dynamicListExtractor.js +160 -0
- package/dist/optimized-extractors/listExtractionHelpers/errors.js +46 -0
- package/dist/optimized-extractors/listExtractionHelpers/getListMatches.js +14 -0
- package/dist/optimized-extractors/listExtractionHelpers/runAiExtraction.js +243 -0
- package/dist/optimized-extractors/listExtractionHelpers/typesAndSchema.js +5 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractPropertiesUsingGPTFromArray.js +277 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/extractStructuredListUsingAi.js +44 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getListContainerXpath.js +94 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getRelativeContainerXpathSelector.js +20 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/getSimplifiedHtmlPerListItem.js +21 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/tablesUtils.js +48 -0
- package/dist/optimized-extractors/listExtractionHelpers/utils/validateOptions.js +52 -0
- package/dist/optimized-extractors/models/anthropicModel.js +23 -0
- package/dist/optimized-extractors/models/openaiModel.js +23 -0
- package/dist/optimized-extractors/objectExtractionHelpers/AIExtractors.js +73 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/checksumUtils.test.js +103 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromLocator.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/__tests__/testObjectExtractorFromPage.spec.js +107 -0
- package/dist/optimized-extractors/objectExtractionHelpers/calculateObjectExampleHash.js +28 -0
- package/dist/optimized-extractors/objectExtractionHelpers/captureSnapshot.js +26 -0
- package/dist/optimized-extractors/objectExtractionHelpers/checksumUtils.js +32 -0
- package/dist/optimized-extractors/objectExtractionHelpers/constants.js +7 -0
- package/dist/optimized-extractors/objectExtractionHelpers/dynamicObjectExtractor.js +106 -0
- package/dist/optimized-extractors/objectExtractionHelpers/errors.js +42 -0
- package/dist/optimized-extractors/objectExtractionHelpers/findDomMatches.js +54 -0
- package/dist/optimized-extractors/objectExtractionHelpers/getSimplifiedHtml.js +122 -0
- package/dist/optimized-extractors/objectExtractionHelpers/typesAndSchemas.js +5 -0
- package/dist/optimized-extractors/objectExtractionHelpers/validateDynamicObjectExtractorOptions.js +52 -0
- package/dist/optimized-extractors/types/aiModelsValidation.js +45 -0
- package/dist/optimized-extractors/types/errors.js +42 -0
- package/dist/optimized-extractors/types/jsonSchema.d.js +5 -0
- package/dist/optimized-extractors/types/jsonSchema.d.ts +50 -0
- package/dist/optimized-extractors/types/types.js +5 -0
- package/dist/optimized-extractors/validators.js +152 -0
- package/dist/types/intuned-runtime.d.js +1 -0
- package/dist/types/intuned-runtime.d.ts +64 -0
- package/dist/vite-env.d.js +1 -0
- package/dist/vite-env.d.ts +9 -0
- package/generated-docs/ai/functions/extractStructuredData.mdx +255 -0
- package/generated-docs/ai/functions/isPageLoaded.mdx +88 -0
- package/generated-docs/ai/interfaces/ArraySchema.mdx +36 -0
- package/generated-docs/ai/interfaces/BasicSchema.mdx +14 -0
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +28 -0
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +39 -0
- package/generated-docs/ai/interfaces/StringSchema.mdx +35 -0
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/ContentItem.mdx +12 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +47 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +85 -0
- package/generated-docs/helpers/functions/downloadFile.mdx +99 -0
- package/generated-docs/helpers/functions/extractMarkdown.mdx +56 -0
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +51 -0
- package/generated-docs/helpers/functions/goToUrl.mdx +124 -0
- package/generated-docs/helpers/functions/processDate.mdx +55 -0
- package/generated-docs/helpers/functions/resolveUrl.mdx +165 -0
- package/generated-docs/helpers/functions/sanitizeHtml.mdx +113 -0
- package/generated-docs/helpers/functions/saveFileToS3.mdx +127 -0
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +89 -0
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +121 -0
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +90 -0
- package/generated-docs/helpers/functions/waitForDomSettled.mdx +91 -0
- package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +76 -0
- package/generated-docs/helpers/interfaces/Attachment.mdx +56 -0
- package/generated-docs/helpers/interfaces/S3Configs.mdx +52 -0
- package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +22 -0
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +10 -0
- package/generated-docs/helpers/type-aliases/FileType.mdx +61 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +62 -0
- package/how-to-generate-docs.md +61 -0
- package/how-to-run-tests.md +42 -0
- package/intuned-runtime-setup.md +13 -0
- package/package.json +124 -0
- package/tsconfig.eslint.json +5 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,641 @@
|
|
|
1
|
+
import { Locator, Page } from "playwright-core";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Base schema interface that all JSON schema types extend from.
|
|
6
|
+
* Provides common properties like type and description.
|
|
7
|
+
*
|
|
8
|
+
* @interface BasicSchema
|
|
9
|
+
*/
|
|
10
|
+
export interface BasicSchema {
|
|
11
|
+
/** The JSON schema type(s) for this schema definition */
|
|
12
|
+
type: "string" | "number" | "integer" | "boolean" | "array" | "object";
|
|
13
|
+
/** Optional description of what this schema represents */
|
|
14
|
+
description?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Schema definition for string values with validation constraints.
|
|
19
|
+
*
|
|
20
|
+
* @interface StringSchema
|
|
21
|
+
* @extends BasicSchema
|
|
22
|
+
* @example
|
|
23
|
+
* ```typescript String Schema
|
|
24
|
+
* import { StringSchema } from "@intuned/browser/ai";
|
|
25
|
+
* export default async function handler(params, page, context){
|
|
26
|
+
* const nameSchema: StringSchema = {
|
|
27
|
+
* type: "string",
|
|
28
|
+
* minLength: 2,
|
|
29
|
+
* maxLength: 50,
|
|
30
|
+
* pattern: "^[A-Za-z\\s]+$",
|
|
31
|
+
* description: "Person's full name"
|
|
32
|
+
* };
|
|
33
|
+
* }
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export interface StringSchema extends BasicSchema {
|
|
37
|
+
/** Must be "string" for string schemas */
|
|
38
|
+
type: "string";
|
|
39
|
+
/** Array of allowed string values (enumeration) */
|
|
40
|
+
enum?: string[];
|
|
41
|
+
/** Maximum allowed string length */
|
|
42
|
+
maxLength?: number;
|
|
43
|
+
/** Minimum required string length */
|
|
44
|
+
minLength?: number;
|
|
45
|
+
/** Regular expression pattern the string must match */
|
|
46
|
+
pattern?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Schema definition for numeric values (numbers and integers) with validation constraints.
|
|
51
|
+
*
|
|
52
|
+
* @interface NumberSchema
|
|
53
|
+
* @extends BasicSchema
|
|
54
|
+
* @example
|
|
55
|
+
* ```typescript Number Schema
|
|
56
|
+
* import { NumberSchema } from "@intuned/browser/ai";
|
|
57
|
+
* export default async function handler(params, page, context){
|
|
58
|
+
* const ageSchema: NumberSchema = {
|
|
59
|
+
* type: "integer",
|
|
60
|
+
* minimum: 0,
|
|
61
|
+
* maximum: 150,
|
|
62
|
+
* description: "Person's age in years"
|
|
63
|
+
* };
|
|
64
|
+
* }
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export interface NumberSchema extends BasicSchema {
|
|
68
|
+
/** Must be "number" or "integer" for numeric schemas */
|
|
69
|
+
type: "number" | "integer";
|
|
70
|
+
/** Number must be a multiple of this value */
|
|
71
|
+
multipleOf?: number;
|
|
72
|
+
/** Maximum allowed value (inclusive) */
|
|
73
|
+
maximum?: number;
|
|
74
|
+
/** Maximum allowed value (exclusive) */
|
|
75
|
+
exclusiveMaximum?: number;
|
|
76
|
+
/** Minimum allowed value (inclusive) */
|
|
77
|
+
minimum?: number;
|
|
78
|
+
/** Minimum allowed value (exclusive) */
|
|
79
|
+
exclusiveMinimum?: number;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Schema definition for boolean values.
|
|
84
|
+
*
|
|
85
|
+
* @interface BooleanSchema
|
|
86
|
+
* @extends BasicSchema
|
|
87
|
+
* @example
|
|
88
|
+
* ```typescript Boolean Schema
|
|
89
|
+
* import { BooleanSchema } from "@intuned/browser/ai";
|
|
90
|
+
* export default async function handler(params, page, context){
|
|
91
|
+
* const isActiveSchema: BooleanSchema = {
|
|
92
|
+
* type: "boolean",
|
|
93
|
+
* description: "Whether the user account is active"
|
|
94
|
+
* };
|
|
95
|
+
* }
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
export interface BooleanSchema extends BasicSchema {
|
|
99
|
+
/** Must be "boolean" for boolean schemas */
|
|
100
|
+
type: "boolean";
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Schema definition for array values with item validation and constraints.
|
|
105
|
+
*
|
|
106
|
+
* @interface ArraySchema
|
|
107
|
+
* @extends BasicSchema
|
|
108
|
+
* @example
|
|
109
|
+
* ```typescript Array Schema
|
|
110
|
+
* import { ArraySchema } from "@intuned/browser/ai";
|
|
111
|
+
* export default async function handler(params, page, context){
|
|
112
|
+
* const tagsSchema: ArraySchema = {
|
|
113
|
+
* type: "array",
|
|
114
|
+
* items: { type: "string" },
|
|
115
|
+
* minItems: 1,
|
|
116
|
+
* maxItems: 10,
|
|
117
|
+
* uniqueItems: true,
|
|
118
|
+
* description: "List of tags"
|
|
119
|
+
* };
|
|
120
|
+
* }
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
123
|
+
export interface ArraySchema extends BasicSchema {
|
|
124
|
+
/** Must be "array" for array schemas */
|
|
125
|
+
type: "array";
|
|
126
|
+
/** Schema definition for array items */
|
|
127
|
+
items: JsonSchema | z.ZodSchema;
|
|
128
|
+
/** Maximum number of items allowed */
|
|
129
|
+
maxItems?: number;
|
|
130
|
+
/** Minimum number of items required */
|
|
131
|
+
minItems?: number;
|
|
132
|
+
/** Whether all items must be unique */
|
|
133
|
+
uniqueItems?: boolean;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Schema definition for object values with property validation and constraints.
|
|
138
|
+
*
|
|
139
|
+
* @interface ObjectSchema
|
|
140
|
+
* @extends BasicSchema
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript Object Schema
|
|
143
|
+
* import { ObjectSchema } from "@intuned/browser/ai";
|
|
144
|
+
* export default async function handler(params, page, context){
|
|
145
|
+
* const userSchema: ObjectSchema = {
|
|
146
|
+
* type: "object",
|
|
147
|
+
* properties: {
|
|
148
|
+
* name: { type: "string" },
|
|
149
|
+
* email: { type: "string", pattern: "^[^@]+@[^@]+\\.[^@]+$" },
|
|
150
|
+
* age: { type: "integer", minimum: 0 }
|
|
151
|
+
* },
|
|
152
|
+
* required: ["name", "email"],
|
|
153
|
+
* description: "User profile information"
|
|
154
|
+
* };
|
|
155
|
+
* }
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
export interface ObjectSchema extends BasicSchema {
|
|
159
|
+
/** Must be "object" for object schemas */
|
|
160
|
+
type: "object";
|
|
161
|
+
/** Schema definitions for object properties */
|
|
162
|
+
properties: Record<string, JsonSchema | z.ZodSchema>;
|
|
163
|
+
/** Array of required property names */
|
|
164
|
+
required?: string[];
|
|
165
|
+
/** Maximum number of properties allowed */
|
|
166
|
+
maxProperties?: number;
|
|
167
|
+
/** Minimum number of properties required */
|
|
168
|
+
minProperties?: number;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Union type representing all supported JSON schema types.
|
|
173
|
+
* Can be a StringSchema, NumberSchema, BooleanSchema, ArraySchema, or ObjectSchema.
|
|
174
|
+
* Each schema type provides validation constraints for its respective data type.
|
|
175
|
+
*
|
|
176
|
+
* @type JsonSchema
|
|
177
|
+
* @example
|
|
178
|
+
* ```typescript Object Schema
|
|
179
|
+
* import { JsonSchema } from "@intuned/browser/ai";
|
|
180
|
+
* export default async function handler(params, page, context){
|
|
181
|
+
* const schema: JsonSchema = {
|
|
182
|
+
* type: "object",
|
|
183
|
+
* properties: {
|
|
184
|
+
* name: { type: "string" },
|
|
185
|
+
* age: { type: "number" }
|
|
186
|
+
* },
|
|
187
|
+
* required: ["name"]
|
|
188
|
+
* };
|
|
189
|
+
* }
|
|
190
|
+
* ```
|
|
191
|
+
* @example
|
|
192
|
+
* ```typescript Array Schema
|
|
193
|
+
* import { JsonSchema } from "@intuned/browser/ai";
|
|
194
|
+
* export default async function handler(params, page, context){
|
|
195
|
+
* const schema: JsonSchema = {
|
|
196
|
+
* type: "array",
|
|
197
|
+
* items: { type: "string" },
|
|
198
|
+
* minItems: 1
|
|
199
|
+
* };
|
|
200
|
+
* }
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
export type JsonSchema =
|
|
204
|
+
| StringSchema
|
|
205
|
+
| NumberSchema
|
|
206
|
+
| BooleanSchema
|
|
207
|
+
| ArraySchema
|
|
208
|
+
| ObjectSchema;
|
|
209
|
+
/**
|
|
210
|
+
* Extract structured data from web pages using AI-powered content analysis.
|
|
211
|
+
* @overload From Page or Locator
|
|
212
|
+
* This function provides intelligent data extraction from web pages using various strategies
|
|
213
|
+
* including HTML parsing, image analysis, and Markdown conversion. It supports extraction
|
|
214
|
+
* from entire pages or specific elements, with built-in caching and retry mechanisms.
|
|
215
|
+
*
|
|
216
|
+
* @param {Object} options - Configuration object containing extraction parameters
|
|
217
|
+
* @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
|
|
218
|
+
* @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
|
|
219
|
+
* @param {string} [options.strategy="HTML"] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
|
|
220
|
+
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
221
|
+
* @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
222
|
+
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
223
|
+
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
224
|
+
* @param {SUPPORTED_MODELS} [options.model="claude-3-5-haiku-latest"] - AI model to use for extraction. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "claude-3-5-haiku-latest"
|
|
225
|
+
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
226
|
+
*
|
|
227
|
+
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
228
|
+
|
|
229
|
+
* @example
|
|
230
|
+
* ```typescript Page source
|
|
231
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
232
|
+
* export default async function handler(params, page, context){
|
|
233
|
+
* await page.goto("https://books.toscrape.com/")
|
|
234
|
+
* const product = await extractStructuredData({
|
|
235
|
+
* source: page,
|
|
236
|
+
* strategy: "HTML",
|
|
237
|
+
* model: "gpt-4o",
|
|
238
|
+
* dataSchema: {
|
|
239
|
+
* type: "object",
|
|
240
|
+
* properties: {
|
|
241
|
+
* name: { type: "string" },
|
|
242
|
+
* price: { type: "string" },
|
|
243
|
+
* description: { type: "string" },
|
|
244
|
+
* inStock: { type: "boolean" }
|
|
245
|
+
* },
|
|
246
|
+
* required: ["name", "price"]
|
|
247
|
+
* },
|
|
248
|
+
* prompt: "Extract product details from this e page"
|
|
249
|
+
* });
|
|
250
|
+
* console.log(`Found book: ${product.name} - ${product.price}`);
|
|
251
|
+
* }
|
|
252
|
+
* ```
|
|
253
|
+
*
|
|
254
|
+
* @example
|
|
255
|
+
* ```typescript Locator source
|
|
256
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
257
|
+
* export default async function handler(params, page, context){
|
|
258
|
+
* await page.goto("https://books.toscrape.com/")
|
|
259
|
+
* const articleContainer = page.locator("article").first()
|
|
260
|
+
* const article = await extractStructuredData({
|
|
261
|
+
* source: articleContainer,
|
|
262
|
+
* strategy: "MARKDOWN",
|
|
263
|
+
* model: "claude-3-5-sonnet-20240620",
|
|
264
|
+
* dataSchema: {
|
|
265
|
+
* type: "object",
|
|
266
|
+
* properties: {
|
|
267
|
+
* title: { type: "string" },
|
|
268
|
+
* author: { type: "string" },
|
|
269
|
+
* publishDate: { type: "string" },
|
|
270
|
+
* content: { type: "string" },
|
|
271
|
+
* },
|
|
272
|
+
* required: ["title"]
|
|
273
|
+
* },
|
|
274
|
+
* maxRetries: 5
|
|
275
|
+
* });
|
|
276
|
+
* console.log(`Found book: ${article.title}`);
|
|
277
|
+
* }
|
|
278
|
+
* ```
|
|
279
|
+
*/
|
|
280
|
+
export declare function extractStructuredData(options: {
|
|
281
|
+
source: Page | Locator;
|
|
282
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
283
|
+
prompt?: string;
|
|
284
|
+
strategy?: "IMAGE" | "MARKDOWN" | "HTML";
|
|
285
|
+
enableDomMatching?: boolean;
|
|
286
|
+
enableCache?: boolean;
|
|
287
|
+
maxRetries?: number;
|
|
288
|
+
model?: SUPPORTED_MODELS;
|
|
289
|
+
apiKey?: string;
|
|
290
|
+
}): Promise<any>;
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Extract structured data from content items (text, images) using AI-powered analysis.
|
|
294
|
+
* @overload From Content
|
|
295
|
+
* This overload provides a simplified interface for data extraction from various content types
|
|
296
|
+
* without requiring a page source or extraction strategy. It accepts text content, image buffers,
|
|
297
|
+
* or image URLs and extracts structured data according to the provided schema.
|
|
298
|
+
*
|
|
299
|
+
* @param {Object} options - Configuration object containing extraction parameters
|
|
300
|
+
* @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of content items
|
|
301
|
+
* @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract
|
|
302
|
+
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
303
|
+
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
304
|
+
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
305
|
+
* @param {SUPPORTED_MODELS} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models
|
|
306
|
+
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
307
|
+
*
|
|
308
|
+
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
309
|
+
*
|
|
310
|
+
* @example
|
|
311
|
+
* ```typescript Text Content
|
|
312
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
313
|
+
* export default async function handler(params, page, context){
|
|
314
|
+
* const textContent: TextContentItem = {
|
|
315
|
+
* type: "text",
|
|
316
|
+
* data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
|
|
317
|
+
* };
|
|
318
|
+
*
|
|
319
|
+
* const person = await extractStructuredData({
|
|
320
|
+
* content: textContent,
|
|
321
|
+
* model: "gpt-4o",
|
|
322
|
+
* dataSchema: {
|
|
323
|
+
* type: "object",
|
|
324
|
+
* properties: {
|
|
325
|
+
* name: { type: "string" },
|
|
326
|
+
* age: { type: "number" },
|
|
327
|
+
* occupation: { type: "string" },
|
|
328
|
+
* company: { type: "string" }
|
|
329
|
+
* },
|
|
330
|
+
* required: ["name"]
|
|
331
|
+
* },
|
|
332
|
+
* prompt: "Extract person information from the text"
|
|
333
|
+
* });
|
|
334
|
+
*
|
|
335
|
+
* console.log(`Found person: ${person.name}, ${person.age} years old`);
|
|
336
|
+
* }
|
|
337
|
+
* ```
|
|
338
|
+
*
|
|
339
|
+
* @example
|
|
340
|
+
* ```typescript Multiple Content Items
|
|
341
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
342
|
+
* export default async function handler(params, page, context){
|
|
343
|
+
* const mixedContent = [
|
|
344
|
+
* { type: "text", data: "Product: iPhone 15" },
|
|
345
|
+
* { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
|
|
346
|
+
* ];
|
|
347
|
+
*
|
|
348
|
+
*
|
|
349
|
+
* const product = await extractStructuredData({
|
|
350
|
+
* content: mixedContent,
|
|
351
|
+
* model: "claude-3",
|
|
352
|
+
* dataSchema: {
|
|
353
|
+
* type: "object",
|
|
354
|
+
* properties: {
|
|
355
|
+
* name: { type: "string" },
|
|
356
|
+
* price: { type: "string" },
|
|
357
|
+
* features: { type: "array", items: { type: "string" } }
|
|
358
|
+
* }
|
|
359
|
+
* },
|
|
360
|
+
* maxRetries: 1,
|
|
361
|
+
* enableCache: true
|
|
362
|
+
* });
|
|
363
|
+
* }
|
|
364
|
+
* ```
|
|
365
|
+
*/
|
|
366
|
+
export declare function extractStructuredData(options: {
|
|
367
|
+
content: ContentItem[] | ContentItem;
|
|
368
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
369
|
+
prompt?: string;
|
|
370
|
+
maxRetries?: number;
|
|
371
|
+
enableCache?: boolean;
|
|
372
|
+
model: SUPPORTED_MODELS;
|
|
373
|
+
apiKey?: string;
|
|
374
|
+
}): Promise<any>;
|
|
375
|
+
|
|
376
|
+
type SUPPORTED_CLAUDE_MODELS =
|
|
377
|
+
| "claude-3-5-haiku-20241022"
|
|
378
|
+
| "claude-3-5-haiku-latest"
|
|
379
|
+
| "claude-3-5-sonnet-20240620"
|
|
380
|
+
| "claude-3-5-sonnet-20241022"
|
|
381
|
+
| "claude-3-5-sonnet-latest"
|
|
382
|
+
| "claude-3-7-sonnet-20250219"
|
|
383
|
+
| "claude-3-7-sonnet-latest"
|
|
384
|
+
| "claude-3-haiku-20240307"
|
|
385
|
+
| "claude-4-opus-20250514"
|
|
386
|
+
| "claude-4-sonnet-20250514"
|
|
387
|
+
| "claude-opus-4-1"
|
|
388
|
+
| "claude-opus-4-1-20250805"
|
|
389
|
+
| "claude-opus-4-20250514"
|
|
390
|
+
| "claude-sonnet-4-20250514";
|
|
391
|
+
|
|
392
|
+
type SUPPORTED_OPENAI_MODELS =
|
|
393
|
+
| "gpt-3.5-turbo"
|
|
394
|
+
| "gpt-3.5-turbo-0125"
|
|
395
|
+
| "gpt-3.5-turbo-0301"
|
|
396
|
+
| "gpt-3.5-turbo-0613"
|
|
397
|
+
| "gpt-3.5-turbo-1106"
|
|
398
|
+
| "gpt-3.5-turbo-16k"
|
|
399
|
+
| "gpt-3.5-turbo-16k-0613"
|
|
400
|
+
| "gpt-3.5-turbo-instruct"
|
|
401
|
+
| "gpt-3.5-turbo-instruct-0914"
|
|
402
|
+
| "gpt-4"
|
|
403
|
+
| "gpt-4-0314"
|
|
404
|
+
| "gpt-4-0613"
|
|
405
|
+
| "gpt-4-32k"
|
|
406
|
+
| "gpt-4-32k-0314"
|
|
407
|
+
| "gpt-4-32k-0613"
|
|
408
|
+
| "gpt-4-turbo"
|
|
409
|
+
| "gpt-4-turbo-2024-04-09"
|
|
410
|
+
| "gpt-4.1"
|
|
411
|
+
| "gpt-4.1-2025-04-14"
|
|
412
|
+
| "gpt-4.1-mini"
|
|
413
|
+
| "gpt-4.1-mini-2025-04-14"
|
|
414
|
+
| "gpt-4.1-nano"
|
|
415
|
+
| "gpt-4.1-nano-2025-04-14"
|
|
416
|
+
| "gpt-4o"
|
|
417
|
+
| "gpt-4o-2024-05-13"
|
|
418
|
+
| "gpt-4o-2024-08-06"
|
|
419
|
+
| "gpt-4o-2024-11-20"
|
|
420
|
+
| "gpt-4o-mini"
|
|
421
|
+
| "gpt-4o-mini-2024-07-18"
|
|
422
|
+
| "gpt-5"
|
|
423
|
+
| "gpt-5-2025-08-07"
|
|
424
|
+
| "gpt-5-chat"
|
|
425
|
+
| "gpt-5-chat-latest"
|
|
426
|
+
| "gpt-5-mini"
|
|
427
|
+
| "gpt-5-mini-2025-08-07"
|
|
428
|
+
| "gpt-5-nano"
|
|
429
|
+
| "gpt-5-nano-2025-08-07"
|
|
430
|
+
| "o1"
|
|
431
|
+
| "o1-2024-12-17"
|
|
432
|
+
| "o1-mini"
|
|
433
|
+
| "o1-mini-2024-09-12"
|
|
434
|
+
| "o1-pro"
|
|
435
|
+
| "o1-pro-2025-03-19"
|
|
436
|
+
| "o3"
|
|
437
|
+
| "o3-2025-04-16"
|
|
438
|
+
| "o3-deep-research"
|
|
439
|
+
| "o3-deep-research-2025-06-26"
|
|
440
|
+
| "o3-mini"
|
|
441
|
+
| "o3-mini-2025-01-31"
|
|
442
|
+
| "o3-pro"
|
|
443
|
+
| "o3-pro-2025-06-10"
|
|
444
|
+
| "o4-mini"
|
|
445
|
+
| "o4-mini-2025-04-16"
|
|
446
|
+
| "o4-mini-deep-research"
|
|
447
|
+
| "o4-mini-deep-research-2025-06-26";
|
|
448
|
+
/**
|
|
449
|
+
* Union type representing all supported AI models for data extraction.
|
|
450
|
+
* Includes models from both OpenAI and Anthropic.
|
|
451
|
+
*
|
|
452
|
+
* **Supported OpenAI Models:**
|
|
453
|
+
* "gpt-3.5-turbo"
|
|
454
|
+
* "gpt-3.5-turbo-0125"
|
|
455
|
+
* "gpt-3.5-turbo-0301"
|
|
456
|
+
* "gpt-3.5-turbo-0613"
|
|
457
|
+
* "gpt-3.5-turbo-1106"
|
|
458
|
+
* "gpt-3.5-turbo-16k"
|
|
459
|
+
* "gpt-3.5-turbo-16k-0613"
|
|
460
|
+
* "gpt-3.5-turbo-instruct"
|
|
461
|
+
* "gpt-3.5-turbo-instruct-0914"
|
|
462
|
+
* "gpt-4"
|
|
463
|
+
* "gpt-4-0314"
|
|
464
|
+
* "gpt-4-0613"
|
|
465
|
+
* "gpt-4-32k"
|
|
466
|
+
* "gpt-4-32k-0314"
|
|
467
|
+
* "gpt-4-32k-0613"
|
|
468
|
+
* "gpt-4-turbo"
|
|
469
|
+
* "gpt-4-turbo-2024-04-09"
|
|
470
|
+
* "gpt-4.1"
|
|
471
|
+
* "gpt-4.1-2025-04-14"
|
|
472
|
+
* "gpt-4.1-mini"
|
|
473
|
+
* "gpt-4.1-mini-2025-04-14"
|
|
474
|
+
* "gpt-4.1-nano"
|
|
475
|
+
* "gpt-4.1-nano-2025-04-14"
|
|
476
|
+
* "gpt-4o"
|
|
477
|
+
* "gpt-4o-2024-05-13"
|
|
478
|
+
* "gpt-4o-2024-08-06"
|
|
479
|
+
* "gpt-4o-2024-11-20"
|
|
480
|
+
* "gpt-4o-mini"
|
|
481
|
+
* "gpt-4o-mini-2024-07-18"
|
|
482
|
+
* "gpt-5"
|
|
483
|
+
* "gpt-5-2025-08-07"
|
|
484
|
+
* "gpt-5-chat"
|
|
485
|
+
* "gpt-5-chat-latest"
|
|
486
|
+
* "gpt-5-mini"
|
|
487
|
+
* "gpt-5-mini-2025-08-07"
|
|
488
|
+
* "gpt-5-nano"
|
|
489
|
+
* "gpt-5-nano-2025-08-07"
|
|
490
|
+
* "o1"
|
|
491
|
+
* "o1-2024-12-17"
|
|
492
|
+
* "o1-mini"
|
|
493
|
+
* "o1-mini-2024-09-12"
|
|
494
|
+
* "o1-pro"
|
|
495
|
+
* "o1-pro-2025-03-19"
|
|
496
|
+
* "o3"
|
|
497
|
+
* "o3-2025-04-16"
|
|
498
|
+
* "o3-deep-research"
|
|
499
|
+
* "o3-deep-research-2025-06-26"
|
|
500
|
+
* "o3-mini"
|
|
501
|
+
* "o3-mini-2025-01-31"
|
|
502
|
+
* "o3-pro"
|
|
503
|
+
* "o3-pro-2025-06-10"
|
|
504
|
+
* "o4-mini"
|
|
505
|
+
* "o4-mini-2025-04-16"
|
|
506
|
+
* "o4-mini-deep-research"
|
|
507
|
+
* "o4-mini-deep-research-2025-06-26"
|
|
508
|
+
*
|
|
509
|
+
* **Supported Anthropic (Claude) Models:**
|
|
510
|
+
* "claude-3-5-haiku-20241022"
|
|
511
|
+
* "claude-3-5-haiku-latest"
|
|
512
|
+
* "claude-3-5-sonnet-20240620"
|
|
513
|
+
* "claude-3-5-sonnet-20241022"
|
|
514
|
+
* "claude-3-5-sonnet-latest"
|
|
515
|
+
* "claude-3-7-sonnet-20250219"
|
|
516
|
+
* "claude-3-7-sonnet-latest"
|
|
517
|
+
* "claude-3-haiku-20240307"
|
|
518
|
+
* "claude-4-opus-20250514"
|
|
519
|
+
* "claude-4-sonnet-20250514"
|
|
520
|
+
* "claude-opus-4-1"
|
|
521
|
+
* "claude-opus-4-1-20250805"
|
|
522
|
+
* "claude-opus-4-20250514"
|
|
523
|
+
* "claude-sonnet-4-20250514"
|
|
524
|
+
*
|
|
525
|
+
* @type SUPPORTED_MODELS
|
|
526
|
+
*/
|
|
527
|
+
export type SUPPORTED_MODELS =
|
|
528
|
+
| SUPPORTED_CLAUDE_MODELS
|
|
529
|
+
| SUPPORTED_OPENAI_MODELS;
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
533
|
+
* Detects loading spinners, blank content, or incomplete page states.
|
|
534
|
+
*
|
|
535
|
+
* @param {Object} input - Input object containing the page to check
|
|
536
|
+
* @param {Page} input.page - The Playwright page to check
|
|
537
|
+
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
|
|
538
|
+
* @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - AI model to use for the check. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "gpt-4o-2024-08-06"
|
|
539
|
+
* @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
|
|
540
|
+
* @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
|
|
541
|
+
* @example
|
|
542
|
+
* ```typescript Check Page Loading
|
|
543
|
+
* import { isPageLoaded } from "@intuned/browser/ai";
|
|
544
|
+
* export default async function handler(params, page, context){
|
|
545
|
+
* // Wait for page to finish loading
|
|
546
|
+
* await page.goto('https://example.com');
|
|
547
|
+
*
|
|
548
|
+
* const pageLoaded = await isPageLoaded({page});
|
|
549
|
+
* if (pageLoaded) {
|
|
550
|
+
* // Continue with scraping or interactions
|
|
551
|
+
* } else {
|
|
552
|
+
* // Wait longer or retry
|
|
553
|
+
* }
|
|
554
|
+
* }
|
|
555
|
+
* ```
|
|
556
|
+
*
|
|
557
|
+
* @example
|
|
558
|
+
* ```typescript Loading Loop
|
|
559
|
+
* import { isPageLoaded } from "@intuned/browser/ai";
|
|
560
|
+
* export default async function handler(params, page, context){
|
|
561
|
+
* // Keep checking until page loads
|
|
562
|
+
* await page.goto("https://example.com");
|
|
563
|
+
* let attempts = 0;
|
|
564
|
+
* while (attempts < 10) {
|
|
565
|
+
* const pageLoaded = await isPageLoaded({
|
|
566
|
+
* page,
|
|
567
|
+
* model: "gpt-4o",
|
|
568
|
+
* timeoutInMs: 5000
|
|
569
|
+
* });
|
|
570
|
+
* if (pageLoaded) break;
|
|
571
|
+
*
|
|
572
|
+
* await page.waitForTimeout(2000);
|
|
573
|
+
* attempts++;
|
|
574
|
+
* }
|
|
575
|
+
* }
|
|
576
|
+
* ```
|
|
577
|
+
* }
|
|
578
|
+
*/
|
|
579
|
+
export declare function isPageLoaded(input: {
|
|
580
|
+
page: Page;
|
|
581
|
+
timeoutInMs?: number;
|
|
582
|
+
model?: SUPPORTED_MODELS;
|
|
583
|
+
apiKey?: string;
|
|
584
|
+
}): Promise<boolean>;
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Represents text content for AI extraction.
|
|
588
|
+
* Used when passing text data directly to extractStructuredData without a page source.
|
|
589
|
+
*
|
|
590
|
+
* @interface TextContentItem
|
|
591
|
+
* @property {string} type - The type of the content item, which is always "text"
|
|
592
|
+
* @property {string} data - The text content to extract data from
|
|
593
|
+
*/
|
|
594
|
+
export interface TextContentItem {
|
|
595
|
+
type: "text";
|
|
596
|
+
data: string;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Represents image content provided as a Buffer for AI extraction.
|
|
601
|
+
* Used when passing image data directly to extractStructuredData without a page source.
|
|
602
|
+
* The image will be analyzed by AI vision models for data extraction.
|
|
603
|
+
*
|
|
604
|
+
* @interface ImageBufferContentItem
|
|
605
|
+
* @property {string} type - The type of the content item, which is always "image-buffer"
|
|
606
|
+
* @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
|
|
607
|
+
* @property {Buffer} data - The Buffer containing the raw image data
|
|
608
|
+
*/
|
|
609
|
+
export interface ImageBufferContentItem {
|
|
610
|
+
type: "image-buffer";
|
|
611
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
612
|
+
data: Buffer;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/**
|
|
616
|
+
* Represents image content provided as a URL for AI extraction.
|
|
617
|
+
* Used when passing image URLs directly to extractStructuredData without a page source.
|
|
618
|
+
* The image will be fetched from the URL and analyzed by AI vision models for data extraction.
|
|
619
|
+
*
|
|
620
|
+
* @interface ImageUrlContentItem
|
|
621
|
+
* @property {string} type - The type of the content item, which is always "image-url"
|
|
622
|
+
* @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
|
|
623
|
+
* @property {string} data - The URL of the image to fetch and analyze
|
|
624
|
+
*/
|
|
625
|
+
export interface ImageUrlContentItem {
|
|
626
|
+
type: "image-url";
|
|
627
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
628
|
+
data: string;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* Union type representing all content items for AI data extraction.
|
|
633
|
+
* @type ContentItem
|
|
634
|
+
* @property {TextContentItem} type - [TextContentItem](../interfaces/TextContentItem) type. Used when passing text data directly to extractStructuredData without a page source.
|
|
635
|
+
* @property {ImageBufferContentItem} type - [ImageBufferContentItem](../interfaces/ImageBufferContentItem) type. Used when passing image data directly to extractStructuredData without a page source.
|
|
636
|
+
* @property {ImageUrlContentItem} type - [ImageUrlContentItem](../interfaces/ImageUrlContentItem) type. Used when passing image URLs directly to extractStructuredData without a page source.
|
|
637
|
+
*/
|
|
638
|
+
export type ContentItem =
|
|
639
|
+
| TextContentItem
|
|
640
|
+
| ImageBufferContentItem
|
|
641
|
+
| ImageUrlContentItem;
|
package/dist/ai/index.js
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
Object.defineProperty(exports, "extractStructuredData", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
get: function () {
|
|
9
|
+
return _extractStructuredData.extractStructuredData;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(exports, "isPageLoaded", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
get: function () {
|
|
15
|
+
return _isPageLoaded.isPageLoaded;
|
|
16
|
+
}
|
|
17
|
+
});
|
|
18
|
+
var _extractStructuredData = require("./extractStructuredData");
|
|
19
|
+
var _isPageLoaded = require("./isPageLoaded");
|