@intuned/browser-dev 0.1.8-dev.0 → 0.1.10-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +85 -143
  2. package/dist/ai/export.d.ts +291 -143
  3. package/dist/ai/extractStructuredData.js +21 -27
  4. package/dist/ai/extractStructuredDataUsingAi.js +24 -1
  5. package/dist/ai/index.d.ts +291 -143
  6. package/dist/ai/tests/testCreateMatchesMapping.spec.js +216 -0
  7. package/dist/ai/tests/testExtractStructuredData.spec.js +348 -2
  8. package/dist/ai/tests/testExtractStructuredDataDomMatchingIframes.spec.js +459 -0
  9. package/dist/ai/tests/testExtractStructuredDataUnit.spec.js +375 -0
  10. package/dist/ai/tests/testMatching.spec.js +342 -0
  11. package/dist/ai/tests/testValidateMatchesMapping.spec.js +265 -0
  12. package/dist/common/Logger/index.js +2 -2
  13. package/dist/common/extendedTest.js +38 -30
  14. package/dist/common/frame_utils/frameTree.js +116 -0
  15. package/dist/common/frame_utils/getContentWithNestedIframes.js +13 -0
  16. package/dist/common/frame_utils/index.js +95 -0
  17. package/dist/common/frame_utils/stitchIframe.js +105 -0
  18. package/dist/{helpers → common}/frame_utils/tests/testFindAllIframes.spec.js +24 -15
  19. package/dist/common/frame_utils/tests/testGetContentWithNestedIframes.spec.js +241 -0
  20. package/dist/common/frame_utils/utils.js +91 -0
  21. package/dist/common/getSimplifiedHtml.js +20 -20
  22. package/dist/common/matching/matching.js +91 -16
  23. package/dist/common/tests/matching.test.js +225 -0
  24. package/dist/common/tests/testGetSimplifiedHtml.spec.js +324 -0
  25. package/dist/helpers/export.d.ts +702 -575
  26. package/dist/helpers/extractMarkdown.js +16 -7
  27. package/dist/helpers/index.d.ts +702 -575
  28. package/dist/helpers/tests/testExtractMarkdown.spec.js +29 -0
  29. package/dist/helpers/waitForDomSettled.js +4 -4
  30. package/dist/helpers/withNetworkSettledWait.js +2 -7
  31. package/dist/optimized-extractors/export.d.ts +17 -18
  32. package/dist/optimized-extractors/index.d.ts +17 -18
  33. package/dist/types/intuned-runtime.d.ts +6 -32
  34. package/how-to-generate-docs.md +40 -28
  35. package/package.json +2 -2
  36. package/dist/helpers/frame_utils/constants.js +0 -8
  37. package/dist/helpers/frame_utils/findAllIframes.js +0 -82
  38. package/dist/helpers/frame_utils/index.js +0 -44
  39. /package/dist/{helpers → common}/frame_utils/checkFrameAllowsAsyncScripts.js +0 -0
  40. /package/dist/{helpers → common}/frame_utils/getContainerFrame.js +0 -0
@@ -177,6 +177,10 @@ export interface ObjectSchema extends BasicSchema {
177
177
  * @example
178
178
  * ```typescript Object Schema
179
179
  * import { JsonSchema } from "@intuned/browser/ai";
180
+ * import { BrowserContext, Page } from "playwright";
181
+ *
182
+ * interface Params {}
183
+ *
180
184
  * export default async function handler(params, page, context){
181
185
  * const schema: JsonSchema = {
182
186
  * type: "object",
@@ -207,73 +211,122 @@ export type JsonSchema =
207
211
  | ArraySchema
208
212
  | ObjectSchema;
209
213
  /**
210
- * Extract structured data from web pages using AI-powered content analysis.
211
- * @overload From Page or Locator
214
+ * Extracts structured data from web pages using AI-powered content analysis.
215
+ *
212
216
  * This function provides intelligent data extraction from web pages using various strategies
213
- * including HTML parsing, image analysis, and Markdown conversion. It supports extraction
214
- * from entire pages or specific elements, with built-in caching and retry mechanisms.
217
+ * including HTML parsing, image analysis, and Markdown conversion. Or by using Text or Image Content.
218
+ * It supports extraction from entire pages or specific elements, with built-in caching and retry mechanisms.
219
+ *
220
+ * @overload Extract From Page or Locator
221
+ *
222
+ * Extract data from web pages or specific elements using HTML, IMAGE, or MARKDOWN strategies with DOM matching support.
223
+ *
224
+ * ## Features and limitations
225
+ *
226
+ * **Features:**
227
+ * - **Smart caching:** Hashes inputs and uses [KV Cache](https://docs.intunedhq.com/docs/01-learn/recipes/kv-cache) for persistent storage
228
+ * - **DOM matching:** With `enableDomMatching=true`, values match DOM elements for smart caching
229
+ * - **Multiple strategies:** HTML, IMAGE, or MARKDOWN based on content type
230
+ * - **Flexible models:** Use any up-to-date model from Anthropic, OpenAI, or Google based on your needs
231
+ *
232
+ * **Limitations:**
233
+ * - **Model variability:** Quality varies by model—experiment to find the best fit
234
+ * - **DOM complexity:** Dynamic structures can affect caching and matching
235
+ * - **IMAGE strategy constraints:** Can't capture truncated or off-screen content
236
+ * - **Schema design:** Complex schemas may reduce accuracy
215
237
  *
216
238
  * @param {Object} options - Configuration object containing extraction parameters
217
- * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
218
- * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
219
- * @param {string} [options.strategy="HTML"] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
220
- * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
221
- * @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
222
- * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
223
- * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
224
- * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001"
225
- * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
226
- *
227
- * @returns Promise resolving to the extracted structured data matching the provided schema
228
-
239
+ * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element.
240
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - Schema defining the structure of the data to extract. Can be a JSON Schema object or a Zod schema.
241
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context. Defaults to undefined.
242
+ * @param {("HTML"|"IMAGE"|"MARKDOWN")} [options.strategy="HTML"] - Type of extraction strategy:
243
+ * - **"HTML"** (default) - Best for text-heavy pages with structured content
244
+ * - **"IMAGE"** - Best for visual content, charts, or complex layouts
245
+ * - **"MARKDOWN"** - Best for article-style content with semantic structure
246
+ * @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. You must enable cache for this to work. When enabled, extraction results are mapped to their corresponding DOM elements and returned with matched results. These results are intelligently cached, allowing subsequent extractions with minor DOM changes to utilize the cached data for improved performance. Defaults to false.
247
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of extraction results. Defaults to true.
248
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3.
249
+ * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001".
250
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account). Defaults to undefined.
251
+ *
252
+ * @returns {Promise<any>} The extracted structured data conforming to the provided schema.
253
+ *
229
254
  * @example
230
- * ```typescript Page source
255
+ * ```typescript Extract book details
231
256
  * import { extractStructuredData } from '@intuned/browser/ai';
232
- * export default async function handler(params, page, context){
233
- * await page.goto("https://books.toscrape.com/")
234
- * const product = await extractStructuredData({
235
- * source: page,
236
- * strategy: "HTML",
237
- * model: "claude-haiku-4-5-20251001",
238
- * dataSchema: {
239
- * type: "object",
240
- * properties: {
241
- * name: { type: "string" },
242
- * price: { type: "string" },
243
- * description: { type: "string" },
244
- * inStock: { type: "boolean" }
257
+ * import { BrowserContext, Page } from "playwright";
258
+ *
259
+ * interface Params {}
260
+ *
261
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
262
+ * await page.goto('https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html');
263
+ * // This will extract the book details from the page, using the HTML strategy with the gpt-4o model.
264
+ * // The dataSchema is a JSON Schema object that defines the structure of the data to extract.
265
+ * // You can also use a Zod schema instead of a JSON Schema object.
266
+ * const book = await extractStructuredData({
267
+ * source: page,
268
+ * strategy: "HTML", // The HTML strategy is the default strategy and will be used if no strategy is provided.
269
+ * model: "gpt-4o",
270
+ * dataSchema: {
271
+ * type: "object",
272
+ * properties: {
273
+ * name: { type: "string" },
274
+ * price: { type: "string" },
275
+ * description: { type: "string" },
276
+ * inStock: { type: "boolean" },
277
+ * rating: { type: "string" }
278
+ * },
279
+ * required: ["name", "price"]
245
280
  * },
246
- * required: ["name", "price"]
247
- * },
248
- * prompt: "Extract product details from this e page"
249
- * });
250
- * console.log(`Found book: ${product.name} - ${product.price}`);
281
+ * prompt: "Extract book details from this page",
282
+ * enableCache: true, // since this is true, the method will call AI for the first time, and then whenever you call this method it will return cached results as long as the DOM is the same.
283
+ * enableDomMatching: true, // since this is true, the method will return the results mapped to the DOM elements, you MUST enable cache for this to work.
284
+ * maxRetries: 3
285
+ * });
286
+ *
287
+ * console.log(`Found book: ${book.name} - ${book.price}`);
251
288
  * }
252
289
  * ```
253
290
  *
254
291
  * @example
255
- * ```typescript Locator source
292
+ * ```typescript Extract all books listings
256
293
  * import { extractStructuredData } from '@intuned/browser/ai';
257
- * export default async function handler(params, page, context){
258
- * await page.goto("https://books.toscrape.com/")
259
- * const articleContainer = page.locator("article").first()
260
- * const article = await extractStructuredData({
261
- * source: articleContainer,
262
- * strategy: "MARKDOWN",
263
- * model: "claude-3-7-sonnet-latest",
264
- * dataSchema: {
265
- * type: "object",
266
- * properties: {
267
- * title: { type: "string" },
268
- * author: { type: "string" },
269
- * publishDate: { type: "string" },
270
- * content: { type: "string" },
271
- * },
272
- * required: ["title"]
273
- * },
274
- * maxRetries: 5
275
- * });
276
- * console.log(`Found book: ${article.title}`);
294
+ * import { BrowserContext, Page } from "playwright";
295
+ *
296
+ * interface Params {}
297
+ *
298
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
299
+ * await page.goto('https://books.toscrape.com/');
300
+ * // This will extract all the books listings from the page, using the HTML strategy with the claude-3-7-sonnet-latest model.
301
+ * // The dataSchema is a JSON Schema object that defines the structure of the data to extract.
302
+ * // You can also use a Zod schema instead of a JSON Schema object.
303
+ * const books = await extractStructuredData({
304
+ * source: page,
305
+ * strategy: "HTML",
306
+ * model: "claude-3-7-sonnet-latest",
307
+ * dataSchema: {
308
+ * type: "object",
309
+ * properties: {
310
+ * products: {
311
+ * type: "array",
312
+ * items: {
313
+ * type: "object",
314
+ * properties: {
315
+ * title: { type: "string" },
316
+ * price: { type: "string" },
317
+ * availability: { type: "string" }
318
+ * }
319
+ * }
320
+ * }
321
+ * }
322
+ * },
323
+ * prompt: "Extract all book listings",
324
+ * enableCache: false, // In this example, we don't want to cache the extracted data, we want to extract the data every time.
325
+ * });
326
+ *
327
+ * for (const book of books.products) {
328
+ * console.log(`${book.title}: ${book.price}`);
329
+ * }
277
330
  * }
278
331
  * ```
279
332
  */
@@ -290,76 +343,110 @@ export declare function extractStructuredData(options: {
290
343
  }): Promise<any>;
291
344
 
292
345
  /**
293
- * Extract structured data from content items (text, images) using AI-powered analysis.
294
- * @overload From Content
295
- * This overload provides a simplified interface for data extraction from various content types
296
- * without requiring a page source or extraction strategy. It accepts text content, image buffers,
297
- * or image URLs and extracts structured data according to the provided schema.
346
+ * Extracts structured data from web pages using AI-powered content analysis.
347
+ *
348
+ * This function provides intelligent data extraction from web pages using various strategies
349
+ * including HTML parsing, image analysis, and Markdown conversion. Or by using Text or Image Content.
350
+ * It supports extraction from entire pages or specific elements, with built-in caching and retry mechanisms.
351
+ *
352
+ * @overload Extract From Content
353
+ *
354
+ * Extract data from text, image buffers, or image URLs without requiring a page source.
355
+ *
356
+ * ## Features and limitations
357
+ *
358
+ * **Features:**
359
+ * - **Smart caching:** Hashes content and uses [KV Cache](https://docs.intunedhq.com/docs/01-learn/recipes/kv-cache) for persistent storage
360
+ * - **Multiple content items:** Combine text, images (buffer or URL) for comprehensive extraction
361
+ * - **Flexible models:** Use any up-to-date model from Anthropic, OpenAI, or Google based on your needs
362
+ *
363
+ * **Limitations:**
364
+ * - **Model variability:** Quality varies by model—experiment to find the best fit
365
+ * - **Schema design:** Complex schemas may reduce accuracy
366
+ * - **Content quality:** Requires meaningful, contextual content for accurate extraction—sparse or ambiguous content produces poor results
298
367
  *
299
368
  * @param {Object} options - Configuration object containing extraction parameters
300
- * @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of content items
301
- * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract
302
- * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
303
- * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
304
- * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
305
- * @param {string} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"). Defaults to "claude-haiku-4-5-20251001"
306
- * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
369
+ * @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of [ContentItem](../type-references/ContentItem).
370
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - Schema defining the expected structure of the extracted data. Can be a JSON Schema object or a Zod schema.
371
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context. Defaults to undefined.
372
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3.
373
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true.
374
+ * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001".
375
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account). Defaults to undefined.
307
376
  *
308
- * @returns Promise resolving to the extracted structured data matching the provided schema
377
+ * @returns {Promise<any>} The extracted structured data conforming to the provided schema.
309
378
  *
310
379
  * @example
311
- * ```typescript Text Content
312
- * import { extractStructuredData } from '@intuned/browser/ai';
313
- * export default async function handler(params, page, context){
314
- * const textContent: TextContentItem = {
315
- * type: "text",
316
- * data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
317
- * };
380
+ * ```typescript Basic Text Content Extraction
381
+ * import { extractStructuredData, TextContentItem } from '@intuned/browser/ai';
382
+ * import { BrowserContext, Page } from "playwright";
318
383
  *
319
- * const person = await extractStructuredData({
320
- * content: textContent,
321
- * model: "claude-haiku-4-5-20251001",
322
- * dataSchema: {
323
- * type: "object",
324
- * properties: {
325
- * name: { type: "string" },
326
- * age: { type: "number" },
327
- * occupation: { type: "string" },
328
- * company: { type: "string" }
329
- * },
330
- * required: ["name"]
331
- * },
332
- * prompt: "Extract person information from the text"
333
- * });
384
+ * interface Params {}
385
+ *
386
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
387
+ * // This will extract the person information from the text, using the gpt-4o model.
388
+ * const textContent: TextContentItem = {
389
+ * type: "text",
390
+ * data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
391
+ * };
334
392
  *
335
- * console.log(`Found person: ${person.name}, ${person.age} years old`);
393
+ * const person = await extractStructuredData({
394
+ * content: textContent,
395
+ * model: "gpt-4o",
396
+ * dataSchema: {
397
+ * type: "object",
398
+ * properties: {
399
+ * name: { type: "string" },
400
+ * age: { type: "number" },
401
+ * occupation: { type: "string" },
402
+ * company: { type: "string" }
403
+ * },
404
+ * required: ["name"]
405
+ * },
406
+ * prompt: "Extract person information from the text"
407
+ * });
408
+ *
409
+ * console.log(`Found person: ${person.name}, ${person.age} years old`);
336
410
  * }
337
411
  * ```
338
412
  *
339
413
  * @example
340
- * ```typescript Multiple Content Items
341
- * import { extractStructuredData } from '@intuned/browser/ai';
342
- * export default async function handler(params, page, context){
343
- * const mixedContent = [
344
- * { type: "text", data: "Product: iPhone 15" },
345
- * { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
346
- * ];
414
+ * ```typescript List Extraction from Text Content
415
+ * import { extractStructuredData, TextContentItem } from '@intuned/browser/ai';
416
+ * import { BrowserContext, Page } from "playwright";
347
417
  *
418
+ * interface Params {}
348
419
  *
349
- * const product = await extractStructuredData({
350
- * content: mixedContent,
351
- * model: "claude-haiku-4-5-20251001",
352
- * dataSchema: {
353
- * type: "object",
354
- * properties: {
355
- * name: { type: "string" },
356
- * price: { type: "string" },
357
- * features: { type: "array", items: { type: "string" } }
420
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
421
+ * const textContent: TextContentItem = {
422
+ * type: "text",
423
+ * data: "iPhone 15 - $999, Samsung Galaxy - $899, Pixel 8 - $699"
424
+ * };
425
+ *
426
+ * const products = await extractStructuredData({
427
+ * content: textContent,
428
+ * model: "gpt-4o",
429
+ * dataSchema: {
430
+ * type: "object",
431
+ * properties: {
432
+ * products: {
433
+ * type: "array",
434
+ * items: {
435
+ * type: "object",
436
+ * properties: {
437
+ * name: { type: "string" },
438
+ * price: { type: "string" }
439
+ * }
440
+ * }
441
+ * }
442
+ * }
443
+ * },
444
+ * prompt: "Extract all products"
445
+ * });
446
+ *
447
+ * for (const product of products.products) {
448
+ * console.log(`${product.name}: ${product.price}`);
358
449
  * }
359
- * },
360
- * maxRetries: 1,
361
- * enableCache: true
362
- * });
363
450
  * }
364
451
  * ```
365
452
  */
@@ -369,7 +456,7 @@ export declare function extractStructuredData(options: {
369
456
  prompt?: string;
370
457
  maxRetries?: number;
371
458
  enableCache?: boolean;
372
- model: string;
459
+ model?: string;
373
460
  apiKey?: string;
374
461
  }): Promise<any>;
375
462
 
@@ -381,20 +468,26 @@ export declare function extractStructuredData(options: {
381
468
  * @param {Page} input.page - The Playwright page to check
382
469
  * @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
383
470
  * @param {string} [input.model="gpt-5-mini-2025-08-07"] - AI model to use for the check. Defaults to "gpt-5-mini-2025-08-07"
384
- * @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
385
- * @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
471
+ * @param {string} [input.apiKey] - Optional API key for the AI call.
472
+ * @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading.
386
473
  * @example
387
474
  * ```typescript Check Page Loading
388
475
  * import { isPageLoaded } from "@intuned/browser/ai";
389
- * export default async function handler(params, page, context){
476
+ * import { BrowserContext, Page } from "playwright";
477
+ *
478
+ * interface Params {}
479
+ *
480
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
390
481
  * // Wait for page to finish loading
391
- * await page.goto('https://example.com');
482
+ * await page.goto('https://sandbox.intuned.dev/');
392
483
  *
393
484
  * const pageLoaded = await isPageLoaded({page});
394
485
  * if (pageLoaded) {
395
486
  * // Continue with scraping or interactions
487
+ * console.log("Page is loaded");
396
488
  * } else {
397
489
  * // Wait longer or retry
490
+ * await page.waitForTimeout(5000);
398
491
  * }
399
492
  * }
400
493
  * ```
@@ -402,24 +495,27 @@ export declare function extractStructuredData(options: {
402
495
  * @example
403
496
  * ```typescript Loading Loop
404
497
  * import { isPageLoaded } from "@intuned/browser/ai";
405
- * export default async function handler(params, page, context){
498
+ * import { BrowserContext, Page } from "playwright";
499
+ *
500
+ * interface Params {}
501
+ *
502
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
406
503
  * // Keep checking until page loads
407
504
  * await page.goto("https://example.com");
408
505
  * let attempts = 0;
409
- * while (attempts < 10) {
506
+ * while (attempts < 10) { // We will retry up to 10 times with a 2-second delay between attempts.
410
507
  * const pageLoaded = await isPageLoaded({
411
508
  * page,
412
- * model: "claude-haiku-4-5-20251001",
509
+ * model: "claude-3-7-sonnet-latest",
413
510
  * timeoutInMs: 5000
414
511
  * });
415
- * if (pageLoaded) break;
512
+ * if (pageLoaded) break; // If the page is loaded, break the loop.
416
513
  *
417
- * await page.waitForTimeout(2000);
514
+ * await page.waitForTimeout(2000); // Wait for 2 seconds before the next attempt.
418
515
  * attempts++;
419
516
  * }
420
517
  * }
421
518
  * ```
422
- * }
423
519
  */
424
520
  export declare function isPageLoaded(input: {
425
521
  page: Page;
@@ -429,56 +525,108 @@ export declare function isPageLoaded(input: {
429
525
  }): Promise<boolean>;
430
526
 
431
527
  /**
432
- * Represents text content for AI extraction.
433
- * Used when passing text data directly to extractStructuredData without a page source.
528
+ * Text content item for content-based extraction.
434
529
  *
435
530
  * @interface TextContentItem
436
- * @property {string} type - The type of the content item, which is always "text"
437
- * @property {string} data - The text content to extract data from
438
531
  */
439
532
  export interface TextContentItem {
533
+ /** The type of the content item, which is always "text". */
440
534
  type: "text";
535
+ /** The text data to extract from. */
441
536
  data: string;
442
537
  }
443
538
 
444
539
  /**
445
- * Represents image content provided as a Buffer for AI extraction.
446
- * Used when passing image data directly to extractStructuredData without a page source.
447
- * The image will be analyzed by AI vision models for data extraction.
540
+ * Image buffer content item for content-based extraction.
448
541
  *
449
542
  * @interface ImageBufferContentItem
450
- * @property {string} type - The type of the content item, which is always "image-buffer"
451
- * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
452
- * @property {Buffer} data - The Buffer containing the raw image data
453
543
  */
454
544
  export interface ImageBufferContentItem {
545
+ /** The type of the content item, which is always "image-buffer". */
455
546
  type: "image-buffer";
547
+ /** The image format (e.g., "png", "jpeg", "gif", "webp"). */
456
548
  image_type: "png" | "jpeg" | "gif" | "webp";
549
+ /** The Buffer containing the raw image data. */
457
550
  data: Buffer;
458
551
  }
459
552
 
460
553
  /**
461
- * Represents image content provided as a URL for AI extraction.
462
- * Used when passing image URLs directly to extractStructuredData without a page source.
463
- * The image will be fetched from the URL and analyzed by AI vision models for data extraction.
554
+ * Image URL content item for content-based extraction.
464
555
  *
465
556
  * @interface ImageUrlContentItem
466
- * @property {string} type - The type of the content item, which is always "image-url"
467
- * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
468
- * @property {string} data - The URL of the image to fetch and analyze
469
557
  */
470
558
  export interface ImageUrlContentItem {
559
+ /** The type of the content item, which is always "image-url". */
471
560
  type: "image-url";
561
+ /** The image format (e.g., "png", "jpeg", "gif", "webp"). */
472
562
  image_type: "png" | "jpeg" | "gif" | "webp";
563
+ /** The URL of the image. */
473
564
  data: string;
474
565
  }
475
566
 
476
567
  /**
477
- * Union type representing all content items for AI data extraction.
568
+ * A union type representing content items for AI data extraction from various content types.
569
+ *
570
+ * This type alias defines the complete set of content types supported by the content-based
571
+ * extractStructuredData function for extracting data from text, image buffers, or image URLs
572
+ * without requiring a page source.
573
+ *
574
+ * **Type variants:**
575
+ * - `TextContentItem`: [TextContentItem](../type-references/TextContentItem) for text data extraction
576
+ * - `ImageBufferContentItem`: [ImageBufferContentItem](../type-references/ImageBufferContentItem) for image data stored as Buffer
577
+ * - `ImageUrlContentItem`: [ImageUrlContentItem](../type-references/ImageUrlContentItem) for image data accessible via URL
578
+ *
478
579
  * @type ContentItem
479
- * @property {TextContentItem} type - [TextContentItem](../interfaces/TextContentItem) type. Used when passing text data directly to extractStructuredData without a page source.
480
- * @property {ImageBufferContentItem} type - [ImageBufferContentItem](../interfaces/ImageBufferContentItem) type. Used when passing image data directly to extractStructuredData without a page source.
481
- * @property {ImageUrlContentItem} type - [ImageUrlContentItem](../interfaces/ImageUrlContentItem) type. Used when passing image URLs directly to extractStructuredData without a page source.
580
+ *
581
+ * @example
582
+ * ```typescript Text Content
583
+ * import { TextContentItem } from "@intuned/browser";
584
+ * import { BrowserContext, Page } from "playwright";
585
+ *
586
+ * interface Params {}
587
+ *
588
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
589
+ * const textContent: TextContentItem = {
590
+ * type: "text",
591
+ * data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
592
+ * };
593
+ * }
594
+ * ```
595
+ *
596
+ * @example
597
+ * ```typescript Image Buffer Content
598
+ * import { ImageBufferContentItem } from "@intuned/browser";
599
+ * import { BrowserContext, Page } from "playwright";
600
+ *
601
+ * interface Params {}
602
+ *
603
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
604
+ * // Assuming you have image data as Buffer
605
+ * const imageData = fs.readFileSync("image.png");
606
+ *
607
+ * const imageContent: ImageBufferContentItem = {
608
+ * type: "image-buffer",
609
+ * image_type: "png",
610
+ * data: imageData
611
+ * };
612
+ * }
613
+ * ```
614
+ *
615
+ * @example
616
+ * ```typescript Image URL Content
617
+ * import { ImageUrlContentItem } from "@intuned/browser";
618
+ * import { BrowserContext, Page } from "playwright";
619
+ *
620
+ * interface Params {}
621
+ *
622
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
623
+ * const imageContent: ImageUrlContentItem = {
624
+ * type: "image-url",
625
+ * image_type: "jpeg",
626
+ * data: "https://example.com/image.jpg"
627
+ * };
628
+ * }
629
+ * ```
482
630
  */
483
631
  export type ContentItem =
484
632
  | TextContentItem