@intuned/browser-dev 2.2.3-unify-sdks.28 → 2.2.3-unify-sdks.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/export.d.ts +268 -135
- package/dist/ai/extractStructuredData.js +117 -28
- package/dist/ai/extractStructuredDataUsingAi.js +4 -4
- package/dist/ai/index.d.ts +268 -135
- package/dist/ai/prompt.js +2 -2
- package/dist/ai/tests/testExtractFromContent.spec.js +372 -0
- package/dist/ai/tests/testExtractStructuredData.spec.js +25 -24
- package/dist/{helpers → ai}/tests/testIsPageLoaded.spec.js +8 -22
- package/dist/ai/validators.js +37 -6
- package/dist/helpers/export.d.ts +357 -254
- package/dist/helpers/gotoUrl.js +11 -9
- package/dist/helpers/index.d.ts +357 -254
- package/dist/helpers/index.js +3 -3
- package/dist/helpers/saveFileToS3.js +6 -2
- package/dist/helpers/scrollToLoadContent.js +4 -4
- package/dist/helpers/tests/testDownloadFile.spec.js +8 -1
- package/dist/helpers/tests/testValidateDataUsingSchema.spec.js +0 -38
- package/dist/helpers/tests/{testWaitForDomSettled.spec.js → testWithDomSettledWait.spec.js} +8 -8
- package/dist/helpers/tests/testWithNetworkIdleWait.spec.js +7 -7
- package/dist/helpers/uploadFileToS3.js +12 -11
- package/dist/helpers/validateDataUsingSchema.js +2 -18
- package/dist/helpers/waitForDomSettled.js +3 -2
- package/dist/helpers/{withNetworkIdleWait.js → withNetworkSettledWait.js} +11 -11
- package/dist/index.d.ts +1 -4
- package/dist/intunedServices/ApiGateway/tests/testApiGateway.spec.js +4 -1
- package/generated-docs/ai/functions/extractStructuredData.mdx +153 -65
- package/generated-docs/ai/functions/isPageLoaded.mdx +6 -60
- package/generated-docs/ai/interfaces/ArraySchema.mdx +2 -2
- package/generated-docs/ai/interfaces/BooleanSchema.mdx +1 -1
- package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +16 -0
- package/generated-docs/ai/interfaces/NumberSchema.mdx +1 -1
- package/generated-docs/ai/interfaces/ObjectSchema.mdx +2 -2
- package/generated-docs/ai/interfaces/StringSchema.mdx +1 -1
- package/generated-docs/ai/interfaces/TextContentItem.mdx +14 -0
- package/generated-docs/ai/type-aliases/JsonSchema.mdx +41 -0
- package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +77 -4
- package/generated-docs/helpers/functions/downloadFile.mdx +1 -1
- package/generated-docs/helpers/functions/extractMarkdown.mdx +4 -4
- package/generated-docs/helpers/functions/filterEmptyValues.mdx +7 -5
- package/generated-docs/helpers/functions/goToUrl.mdx +70 -28
- package/generated-docs/helpers/functions/processDate.mdx +6 -5
- package/generated-docs/helpers/functions/resolveUrl.mdx +107 -25
- package/generated-docs/helpers/functions/saveFileToS3.mdx +28 -48
- package/generated-docs/helpers/functions/scrollToLoadContent.mdx +7 -7
- package/generated-docs/helpers/functions/uploadFileToS3.mdx +23 -9
- package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +7 -7
- package/generated-docs/helpers/functions/{waitForDomSettled.mdx → withDomSettledWait.mdx} +12 -16
- package/generated-docs/helpers/functions/{withNetworkIdle.mdx → withNetworkIdleWait.mdx} +11 -11
- package/generated-docs/helpers/interfaces/Attachment.mdx +19 -12
- package/generated-docs/helpers/interfaces/S3Configs.mdx +21 -8
- package/generated-docs/helpers/type-aliases/AttachmentType.mdx +1 -3
- package/generated-docs/helpers/type-aliases/FileType.mdx +58 -0
- package/generated-docs/helpers/type-aliases/Trigger.mdx +50 -4
- package/package.json +2 -1
- package/dist/intunedServices/ApiGateway/test.spec.js +0 -1
- package/generated-docs/helpers/interfaces/S3UploadOptions.mdx +0 -40
- package/generated-docs/helpers/type-aliases/DataInput.mdx +0 -11
- package/generated-docs/helpers/type-aliases/DataObject.mdx +0 -11
- package/generated-docs/helpers/type-aliases/S3UploadableFile.mdx +0 -10
package/dist/ai/export.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Locator, Page } from "playwright-core";
|
|
2
|
+
import { z } from "zod";
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Base schema interface that all JSON schema types extend from.
|
|
@@ -19,7 +20,7 @@ export interface BasicSchema {
|
|
|
19
20
|
* @interface StringSchema
|
|
20
21
|
* @extends BasicSchema
|
|
21
22
|
* @example
|
|
22
|
-
* ```typescript
|
|
23
|
+
* ```typescript String Schema
|
|
23
24
|
* const nameSchema: StringSchema = {
|
|
24
25
|
* type: "string",
|
|
25
26
|
* minLength: 2,
|
|
@@ -48,7 +49,7 @@ export interface StringSchema extends BasicSchema {
|
|
|
48
49
|
* @interface NumberSchema
|
|
49
50
|
* @extends BasicSchema
|
|
50
51
|
* @example
|
|
51
|
-
* ```typescript
|
|
52
|
+
* ```typescript Number Schema
|
|
52
53
|
* const ageSchema: NumberSchema = {
|
|
53
54
|
* type: "integer",
|
|
54
55
|
* minimum: 0,
|
|
@@ -78,7 +79,7 @@ export interface NumberSchema extends BasicSchema {
|
|
|
78
79
|
* @interface BooleanSchema
|
|
79
80
|
* @extends BasicSchema
|
|
80
81
|
* @example
|
|
81
|
-
* ```typescript
|
|
82
|
+
* ```typescript Boolean Schema
|
|
82
83
|
* const isActiveSchema: BooleanSchema = {
|
|
83
84
|
* type: "boolean",
|
|
84
85
|
* description: "Whether the user account is active"
|
|
@@ -96,7 +97,7 @@ export interface BooleanSchema extends BasicSchema {
|
|
|
96
97
|
* @interface ArraySchema
|
|
97
98
|
* @extends BasicSchema
|
|
98
99
|
* @example
|
|
99
|
-
* ```typescript
|
|
100
|
+
* ```typescript Array Schema
|
|
100
101
|
* const tagsSchema: ArraySchema = {
|
|
101
102
|
* type: "array",
|
|
102
103
|
* items: { type: "string" },
|
|
@@ -111,7 +112,7 @@ export interface ArraySchema extends BasicSchema {
|
|
|
111
112
|
/** Must be "array" for array schemas */
|
|
112
113
|
type: "array";
|
|
113
114
|
/** Schema definition for array items */
|
|
114
|
-
items: JsonSchema;
|
|
115
|
+
items: JsonSchema | z.ZodSchema;
|
|
115
116
|
/** Maximum number of items allowed */
|
|
116
117
|
maxItems?: number;
|
|
117
118
|
/** Minimum number of items required */
|
|
@@ -126,7 +127,7 @@ export interface ArraySchema extends BasicSchema {
|
|
|
126
127
|
* @interface ObjectSchema
|
|
127
128
|
* @extends BasicSchema
|
|
128
129
|
* @example
|
|
129
|
-
* ```typescript
|
|
130
|
+
* ```typescript Object Schema
|
|
130
131
|
* const userSchema: ObjectSchema = {
|
|
131
132
|
* type: "object",
|
|
132
133
|
* properties: {
|
|
@@ -143,7 +144,7 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
143
144
|
/** Must be "object" for object schemas */
|
|
144
145
|
type: "object";
|
|
145
146
|
/** Schema definitions for object properties */
|
|
146
|
-
properties: Record<string, JsonSchema>;
|
|
147
|
+
properties: Record<string, JsonSchema | z.ZodSchema>;
|
|
147
148
|
/** Array of required property names */
|
|
148
149
|
required?: string[];
|
|
149
150
|
/** Maximum number of properties allowed */
|
|
@@ -153,22 +154,54 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
153
154
|
}
|
|
154
155
|
|
|
155
156
|
/**
|
|
156
|
-
*
|
|
157
|
+
* Union type representing all supported JSON schema types.
|
|
158
|
+
* Can be a StringSchema, NumberSchema, BooleanSchema, ArraySchema, or ObjectSchema.
|
|
159
|
+
* Each schema type provides validation constraints for its respective data type.
|
|
157
160
|
*
|
|
161
|
+
* @type JsonSchema
|
|
162
|
+
* @example
|
|
163
|
+
* ```typescript Object Schema
|
|
164
|
+
* const schema: JsonSchema = {
|
|
165
|
+
* type: "object",
|
|
166
|
+
* properties: {
|
|
167
|
+
* name: { type: "string" },
|
|
168
|
+
* age: { type: "number" }
|
|
169
|
+
* },
|
|
170
|
+
* required: ["name"]
|
|
171
|
+
* };
|
|
172
|
+
* ```
|
|
173
|
+
* @example
|
|
174
|
+
* ```typescript Array Schema
|
|
175
|
+
* const schema: JsonSchema = {
|
|
176
|
+
* type: "array",
|
|
177
|
+
* items: { type: "string" },
|
|
178
|
+
* minItems: 1
|
|
179
|
+
* };
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
export type JsonSchema =
|
|
183
|
+
| StringSchema
|
|
184
|
+
| NumberSchema
|
|
185
|
+
| BooleanSchema
|
|
186
|
+
| ArraySchema
|
|
187
|
+
| ObjectSchema;
|
|
188
|
+
/**
|
|
189
|
+
* Extract structured data from web pages using AI-powered content analysis.
|
|
190
|
+
* @overload From Page or Locator
|
|
158
191
|
* This function provides intelligent data extraction from web pages using various strategies
|
|
159
192
|
* including HTML parsing, image analysis, and Markdown conversion. It supports extraction
|
|
160
193
|
* from entire pages or specific elements, with built-in caching and retry mechanisms.
|
|
161
194
|
*
|
|
162
195
|
* @param {Object} options - Configuration object containing extraction parameters
|
|
163
196
|
* @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
|
|
164
|
-
* @param {JsonSchema} options.dataSchema -
|
|
165
|
-
* @param {SUPPORTED_MODELS} [options.model] - AI model to use for extraction
|
|
166
|
-
* @param {string} [options.strategy] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN"
|
|
197
|
+
* @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract. This can be a [JsonSchema](../type-aliases/JsonSchema) or ZodSchema
|
|
198
|
+
* @param {SUPPORTED_MODELS} [options.model="claude-3-5-haiku-latest"] - AI model to use for extraction. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "claude-3-5-haiku-latest"
|
|
199
|
+
* @param {string} [options.strategy="HTML"] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
|
|
167
200
|
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
168
|
-
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
169
|
-
* @param {boolean} [options.enableDomMatching=false] - Whether to
|
|
170
|
-
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data.
|
|
171
|
-
* @param {
|
|
201
|
+
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
202
|
+
* @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
|
|
203
|
+
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
204
|
+
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
172
205
|
*
|
|
173
206
|
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
174
207
|
|
|
@@ -176,7 +209,11 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
176
209
|
* ```typescript Extract Product Information from Entire Page
|
|
177
210
|
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
178
211
|
*
|
|
179
|
-
* const
|
|
212
|
+
* const product = await extractStructuredData({
|
|
213
|
+
* source: page,
|
|
214
|
+
* strategy: "HTML",
|
|
215
|
+
* model: "gpt-4o",
|
|
216
|
+
* dataSchema: {
|
|
180
217
|
* type: "object",
|
|
181
218
|
* properties: {
|
|
182
219
|
* name: { type: "string" },
|
|
@@ -185,13 +222,7 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
185
222
|
* inStock: { type: "boolean" }
|
|
186
223
|
* },
|
|
187
224
|
* required: ["name", "price"]
|
|
188
|
-
* }
|
|
189
|
-
*
|
|
190
|
-
* const product = await extractStructuredData({
|
|
191
|
-
* source: page,
|
|
192
|
-
* strategy: "HTML",
|
|
193
|
-
* model: "gpt-4o",
|
|
194
|
-
* dataSchema: productSchema,
|
|
225
|
+
* },
|
|
195
226
|
* prompt: "Extract product details from this e-commerce page"
|
|
196
227
|
* });
|
|
197
228
|
*
|
|
@@ -202,7 +233,12 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
202
233
|
* ```typescript Extract Article Data from Specific Element
|
|
203
234
|
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
204
235
|
*
|
|
205
|
-
* const
|
|
236
|
+
* const articleContainer = page.locator("article.main-content");
|
|
237
|
+
* const article = await extractStructuredData({
|
|
238
|
+
* source: articleContainer,
|
|
239
|
+
* strategy: "MARKDOWN",
|
|
240
|
+
* model: "claude-3",
|
|
241
|
+
* dataSchema: {
|
|
206
242
|
* type: "object",
|
|
207
243
|
* properties: {
|
|
208
244
|
* title: { type: "string" },
|
|
@@ -212,64 +248,107 @@ export interface ObjectSchema extends BasicSchema {
|
|
|
212
248
|
* tags: { type: "array", items: { type: "string" } }
|
|
213
249
|
* },
|
|
214
250
|
* required: ["title", "content"]
|
|
215
|
-
* }
|
|
216
|
-
*
|
|
217
|
-
* const articleContainer = page.locator("article.main-content");
|
|
218
|
-
* const article = await extractStructuredData({
|
|
219
|
-
* source: articleContainer,
|
|
220
|
-
* strategy: "MARKDOWN",
|
|
221
|
-
* model: "claude-3",
|
|
222
|
-
* dataSchema: articleSchema,
|
|
251
|
+
* },
|
|
223
252
|
* maxRetries: 5
|
|
224
253
|
* });
|
|
225
254
|
*
|
|
226
255
|
* console.log(`Article: ${article.title} by ${article.author}`);
|
|
227
256
|
* ```
|
|
257
|
+
*/
|
|
258
|
+
export declare function extractStructuredData(options: {
|
|
259
|
+
source: Page | Locator;
|
|
260
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
261
|
+
prompt?: string;
|
|
262
|
+
strategy?: "IMAGE" | "MARKDOWN" | "HTML";
|
|
263
|
+
model?: SUPPORTED_MODELS;
|
|
264
|
+
apiKey?: string;
|
|
265
|
+
enableDomMatching?: boolean;
|
|
266
|
+
enableCache?: boolean;
|
|
267
|
+
maxRetries?: number;
|
|
268
|
+
}): Promise<any>;
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Extract structured data from content items (text, images) using AI-powered analysis.
|
|
272
|
+
* @overload From Content
|
|
273
|
+
* This overload provides a simplified interface for data extraction from various content types
|
|
274
|
+
* without requiring a page source or extraction strategy. It accepts text content, image buffers,
|
|
275
|
+
* or image URLs and extracts structured data according to the provided schema.
|
|
228
276
|
*
|
|
229
|
-
* @
|
|
230
|
-
*
|
|
277
|
+
* @param {Object} options - Configuration object containing extraction parameters
|
|
278
|
+
* @param {TextContentItem | ImageBufferContentItem | ImageUrlContentItem | Array<TextContentItem | ImageBufferContentItem | ImageUrlContentItem>} options.content - Content to extract data from - can be a single content item or array of content items
|
|
279
|
+
* @param {JsonSchema | z.ZodSchema} options.dataSchema - [JsonSchema](../type-aliases/JsonSchema) defining the structure of the data to extract
|
|
280
|
+
* @param {SUPPORTED_MODELS} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models
|
|
281
|
+
* @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
|
|
282
|
+
* @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
|
|
283
|
+
* @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
|
|
284
|
+
* @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
|
|
285
|
+
*
|
|
286
|
+
* @returns Promise resolving to the extracted structured data matching the provided schema
|
|
287
|
+
*
|
|
288
|
+
* @example
|
|
289
|
+
* ```typescript Extract Data from Text Content
|
|
231
290
|
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
232
291
|
*
|
|
233
|
-
* const
|
|
234
|
-
* type: "
|
|
235
|
-
*
|
|
236
|
-
* title: { type: "string" },
|
|
237
|
-
* dataPoints: {
|
|
238
|
-
* type: "array",
|
|
239
|
-
* items: {
|
|
240
|
-
* type: "object",
|
|
241
|
-
* properties: {
|
|
242
|
-
* label: { type: "string" },
|
|
243
|
-
* value: { type: "number" }
|
|
244
|
-
* }
|
|
245
|
-
* }
|
|
246
|
-
* }
|
|
247
|
-
* }
|
|
292
|
+
* const textContent: TextContentItem = {
|
|
293
|
+
* type: "text",
|
|
294
|
+
* data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
|
|
248
295
|
* };
|
|
249
296
|
*
|
|
250
|
-
* const
|
|
251
|
-
*
|
|
252
|
-
* source: chartElement,
|
|
253
|
-
* strategy: "IMAGE",
|
|
297
|
+
* const person = await extractStructuredData({
|
|
298
|
+
* content: textContent,
|
|
254
299
|
* model: "gpt-4o",
|
|
255
|
-
* dataSchema:
|
|
256
|
-
*
|
|
300
|
+
* dataSchema: {
|
|
301
|
+
* type: "object",
|
|
302
|
+
* properties: {
|
|
303
|
+
* name: { type: "string" },
|
|
304
|
+
* age: { type: "number" },
|
|
305
|
+
* occupation: { type: "string" },
|
|
306
|
+
* company: { type: "string" }
|
|
307
|
+
* },
|
|
308
|
+
* required: ["name"]
|
|
309
|
+
* },
|
|
310
|
+
* prompt: "Extract person information from the text"
|
|
257
311
|
* });
|
|
258
312
|
*
|
|
259
|
-
* console.log(`
|
|
260
|
-
*
|
|
261
|
-
*
|
|
313
|
+
* console.log(`Found person: ${person.name}, ${person.age} years old`);
|
|
314
|
+
* ```
|
|
315
|
+
*
|
|
316
|
+
* @example
|
|
317
|
+
* ```typescript Extract Data from Multiple Content Items
|
|
318
|
+
* import { extractStructuredData } from '@intuned/browser/ai';
|
|
319
|
+
*
|
|
320
|
+
* const mixedContent = [
|
|
321
|
+
* { type: "text", data: "Product: iPhone 15" },
|
|
322
|
+
* { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
|
|
323
|
+
* ];
|
|
324
|
+
*
|
|
325
|
+
*
|
|
326
|
+
* const product = await extractStructuredData({
|
|
327
|
+
* content: mixedContent,
|
|
328
|
+
* model: "claude-3",
|
|
329
|
+
* dataSchema: {
|
|
330
|
+
* type: "object",
|
|
331
|
+
* properties: {
|
|
332
|
+
* name: { type: "string" },
|
|
333
|
+
* price: { type: "string" },
|
|
334
|
+
* features: { type: "array", items: { type: "string" } }
|
|
335
|
+
* }
|
|
336
|
+
* },
|
|
337
|
+
* maxRetries: 1,
|
|
338
|
+
* enableCache: true
|
|
262
339
|
* });
|
|
263
340
|
* ```
|
|
264
341
|
*/
|
|
265
342
|
export declare function extractStructuredData(options: {
|
|
266
|
-
|
|
267
|
-
|
|
343
|
+
content:
|
|
344
|
+
| (TextContentItem | ImageBufferContentItem | ImageUrlContentItem)[]
|
|
345
|
+
| TextContentItem
|
|
346
|
+
| ImageBufferContentItem
|
|
347
|
+
| ImageUrlContentItem;
|
|
348
|
+
dataSchema: JsonSchema | z.ZodSchema;
|
|
268
349
|
prompt?: string;
|
|
269
|
-
|
|
270
|
-
model?: SUPPORTED_MODELS;
|
|
350
|
+
model: SUPPORTED_MODELS;
|
|
271
351
|
apiKey?: string;
|
|
272
|
-
enableDomMatching?: boolean;
|
|
273
352
|
enableCache?: boolean;
|
|
274
353
|
maxRetries?: number;
|
|
275
354
|
}): Promise<any>;
|
|
@@ -347,82 +426,98 @@ type SUPPORTED_OPENAI_MODELS =
|
|
|
347
426
|
| "o4-mini-deep-research"
|
|
348
427
|
| "o4-mini-deep-research-2025-06-26";
|
|
349
428
|
/**
|
|
350
|
-
*
|
|
351
|
-
*
|
|
352
|
-
* The models are used in the extraction strategies to process and analyze the content of web pages or elements.
|
|
353
|
-
* @type SUPPORTED_MODELS
|
|
354
|
-
*/
|
|
355
|
-
type SUPPORTED_MODELS = SUPPORTED_CLAUDE_MODELS | SUPPORTED_OPENAI_MODELS;
|
|
356
|
-
|
|
357
|
-
/**
|
|
358
|
-
* Represents a JSON Schema definition for validating data structures.
|
|
359
|
-
* Supports various schema types including string, number, boolean, array, and object schemas
|
|
360
|
-
* with their respective validation rules and constraints.
|
|
429
|
+
* Union type representing all supported AI models for data extraction.
|
|
430
|
+
* Includes models from both OpenAI and Anthropic.
|
|
361
431
|
*
|
|
362
|
-
*
|
|
363
|
-
* -
|
|
364
|
-
* -
|
|
365
|
-
* -
|
|
366
|
-
* -
|
|
367
|
-
* -
|
|
368
|
-
*
|
|
369
|
-
*
|
|
370
|
-
*
|
|
371
|
-
*
|
|
372
|
-
*
|
|
373
|
-
*
|
|
374
|
-
*
|
|
375
|
-
*
|
|
376
|
-
*
|
|
377
|
-
*
|
|
378
|
-
*
|
|
379
|
-
*
|
|
380
|
-
*
|
|
381
|
-
*
|
|
382
|
-
*
|
|
383
|
-
*
|
|
384
|
-
*
|
|
385
|
-
*
|
|
386
|
-
*
|
|
387
|
-
*
|
|
388
|
-
*
|
|
432
|
+
* **Supported OpenAI Models:**
|
|
433
|
+
* "gpt-3.5-turbo"
|
|
434
|
+
* "gpt-3.5-turbo-0125"
|
|
435
|
+
* "gpt-3.5-turbo-0301"
|
|
436
|
+
* "gpt-3.5-turbo-0613"
|
|
437
|
+
* "gpt-3.5-turbo-1106"
|
|
438
|
+
* "gpt-3.5-turbo-16k"
|
|
439
|
+
* "gpt-3.5-turbo-16k-0613"
|
|
440
|
+
* "gpt-3.5-turbo-instruct"
|
|
441
|
+
* "gpt-3.5-turbo-instruct-0914"
|
|
442
|
+
* "gpt-4"
|
|
443
|
+
* "gpt-4-0314"
|
|
444
|
+
* "gpt-4-0613"
|
|
445
|
+
* "gpt-4-32k"
|
|
446
|
+
* "gpt-4-32k-0314"
|
|
447
|
+
* "gpt-4-32k-0613"
|
|
448
|
+
* "gpt-4-turbo"
|
|
449
|
+
* "gpt-4-turbo-2024-04-09"
|
|
450
|
+
* "gpt-4.1"
|
|
451
|
+
* "gpt-4.1-2025-04-14"
|
|
452
|
+
* "gpt-4.1-mini"
|
|
453
|
+
* "gpt-4.1-mini-2025-04-14"
|
|
454
|
+
* "gpt-4.1-nano"
|
|
455
|
+
* "gpt-4.1-nano-2025-04-14"
|
|
456
|
+
* "gpt-4o"
|
|
457
|
+
* "gpt-4o-2024-05-13"
|
|
458
|
+
* "gpt-4o-2024-08-06"
|
|
459
|
+
* "gpt-4o-2024-11-20"
|
|
460
|
+
* "gpt-4o-mini"
|
|
461
|
+
* "gpt-4o-mini-2024-07-18"
|
|
462
|
+
* "gpt-5"
|
|
463
|
+
* "gpt-5-2025-08-07"
|
|
464
|
+
* "gpt-5-chat"
|
|
465
|
+
* "gpt-5-chat-latest"
|
|
466
|
+
* "gpt-5-mini"
|
|
467
|
+
* "gpt-5-mini-2025-08-07"
|
|
468
|
+
* "gpt-5-nano"
|
|
469
|
+
* "gpt-5-nano-2025-08-07"
|
|
470
|
+
* "o1"
|
|
471
|
+
* "o1-2024-12-17"
|
|
472
|
+
* "o1-mini"
|
|
473
|
+
* "o1-mini-2024-09-12"
|
|
474
|
+
* "o1-pro"
|
|
475
|
+
* "o1-pro-2025-03-19"
|
|
476
|
+
* "o3"
|
|
477
|
+
* "o3-2025-04-16"
|
|
478
|
+
* "o3-deep-research"
|
|
479
|
+
* "o3-deep-research-2025-06-26"
|
|
480
|
+
* "o3-mini"
|
|
481
|
+
* "o3-mini-2025-01-31"
|
|
482
|
+
* "o3-pro"
|
|
483
|
+
* "o3-pro-2025-06-10"
|
|
484
|
+
* "o4-mini"
|
|
485
|
+
* "o4-mini-2025-04-16"
|
|
486
|
+
* "o4-mini-deep-research"
|
|
487
|
+
* "o4-mini-deep-research-2025-06-26"
|
|
389
488
|
*
|
|
390
|
-
*
|
|
391
|
-
*
|
|
392
|
-
*
|
|
393
|
-
*
|
|
394
|
-
*
|
|
395
|
-
*
|
|
396
|
-
*
|
|
397
|
-
*
|
|
398
|
-
*
|
|
399
|
-
*
|
|
400
|
-
*
|
|
401
|
-
*
|
|
489
|
+
* **Supported Anthropic (Claude) Models:**
|
|
490
|
+
* "claude-3-5-haiku-20241022"
|
|
491
|
+
* "claude-3-5-haiku-latest"
|
|
492
|
+
* "claude-3-5-sonnet-20240620"
|
|
493
|
+
* "claude-3-5-sonnet-20241022"
|
|
494
|
+
* "claude-3-5-sonnet-latest"
|
|
495
|
+
* "claude-3-7-sonnet-20250219"
|
|
496
|
+
* "claude-3-7-sonnet-latest"
|
|
497
|
+
* "claude-3-haiku-20240307"
|
|
498
|
+
* "claude-4-opus-20250514"
|
|
499
|
+
* "claude-4-sonnet-20250514"
|
|
500
|
+
* "claude-opus-4-1"
|
|
501
|
+
* "claude-opus-4-1-20250805"
|
|
502
|
+
* "claude-opus-4-20250514"
|
|
503
|
+
* "claude-sonnet-4-20250514"
|
|
402
504
|
*
|
|
403
|
-
* @
|
|
404
|
-
* ```typescript Object Schema
|
|
405
|
-
* const objectSchema: JsonSchema = {
|
|
406
|
-
* type: "object",
|
|
407
|
-
* properties: {
|
|
408
|
-
* name: { type: "string" },
|
|
409
|
-
* age: { type: "number", minimum: 0 },
|
|
410
|
-
* email: { type: "string", pattern: "^[^@]+@[^@]+\\.[^@]+$" }
|
|
411
|
-
* },
|
|
412
|
-
* required: ["name", "email"]
|
|
413
|
-
* };
|
|
414
|
-
* ```
|
|
505
|
+
* @type SUPPORTED_MODELS
|
|
415
506
|
*/
|
|
507
|
+
export type SUPPORTED_MODELS =
|
|
508
|
+
| SUPPORTED_CLAUDE_MODELS
|
|
509
|
+
| SUPPORTED_OPENAI_MODELS;
|
|
416
510
|
|
|
417
511
|
/**
|
|
418
512
|
* Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
|
|
419
513
|
* Detects loading spinners, blank content, or incomplete page states.
|
|
514
|
+
*
|
|
420
515
|
* @param {Object} input - Input object containing the page to check
|
|
421
516
|
* @param {Page} input.page - The Playwright page to check
|
|
422
|
-
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds
|
|
423
|
-
* @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS)
|
|
424
|
-
* @param {string} [input.apiKey] - Optional API key for the AI service
|
|
425
|
-
* @returns {Promise
|
|
517
|
+
* @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
|
|
518
|
+
* @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - AI model to use for the check. See [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models. Defaults to "gpt-4o-2024-08-06"
|
|
519
|
+
* @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
|
|
520
|
+
* @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
|
|
426
521
|
* @example
|
|
427
522
|
* ```typescript Check Page Loading
|
|
428
523
|
* import { isPageLoaded } from "@intuned/browser/ai";
|
|
@@ -465,9 +560,47 @@ export declare function isPageLoaded(input: {
|
|
|
465
560
|
apiKey?: string;
|
|
466
561
|
}): Promise<boolean>;
|
|
467
562
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
563
|
+
/**
|
|
564
|
+
* Represents text content for AI extraction.
|
|
565
|
+
* Used when passing text data directly to extractStructuredData without a page source.
|
|
566
|
+
*
|
|
567
|
+
* @interface TextContentItem
|
|
568
|
+
* @property {string} type - The type of the content item, which is always "text"
|
|
569
|
+
* @property {string} data - The text content to extract data from
|
|
570
|
+
*/
|
|
571
|
+
export interface TextContentItem {
|
|
572
|
+
type: "text";
|
|
573
|
+
data: string;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Represents image content provided as a Buffer for AI extraction.
|
|
578
|
+
* Used when passing image data directly to extractStructuredData without a page source.
|
|
579
|
+
* The image will be analyzed by AI vision models for data extraction.
|
|
580
|
+
*
|
|
581
|
+
* @interface ImageBufferContentItem
|
|
582
|
+
* @property {string} type - The type of the content item, which is always "image-buffer"
|
|
583
|
+
* @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
|
|
584
|
+
* @property {Buffer} data - The Buffer containing the raw image data
|
|
585
|
+
*/
|
|
586
|
+
export interface ImageBufferContentItem {
|
|
587
|
+
type: "image-buffer";
|
|
588
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
589
|
+
data: Buffer;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Represents image content provided as a URL for AI extraction.
|
|
594
|
+
* Used when passing image URLs directly to extractStructuredData without a page source.
|
|
595
|
+
* The image will be fetched from the URL and analyzed by AI vision models for data extraction.
|
|
596
|
+
*
|
|
597
|
+
* @interface ImageUrlContentItem
|
|
598
|
+
* @property {string} type - The type of the content item, which is always "image-url"
|
|
599
|
+
* @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
|
|
600
|
+
* @property {string} data - The URL of the image to fetch and analyze
|
|
601
|
+
*/
|
|
602
|
+
export interface ImageUrlContentItem {
|
|
603
|
+
type: "image-url";
|
|
604
|
+
image_type: "png" | "jpeg" | "gif" | "webp";
|
|
605
|
+
data: string;
|
|
606
|
+
}
|