npm - @intuned/browser-dev - Versions diffs - 0.1.7-dev.0 → 0.1.9-dev.0 - Mend

@intuned/browser-dev 0.1.7-dev.0 → 0.1.9-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/README.md +85 -143
package/dist/ai/export.d.ts +292 -144
package/dist/ai/extractStructuredDataUsingAi.js +24 -1
package/dist/ai/index.d.ts +292 -144
package/dist/ai/tests/testExtractStructuredData.spec.js +2 -2
package/dist/common/Logger/index.js +2 -2
package/dist/helpers/export.d.ts +703 -577
package/dist/helpers/gotoUrl.js +50 -51
package/dist/helpers/index.d.ts +703 -577
package/dist/helpers/tests/testClickUntilExhausted.spec.js +2 -1
package/dist/helpers/withNetworkSettledWait.js +2 -7
package/dist/optimized-extractors/export.d.ts +17 -18
package/dist/optimized-extractors/index.d.ts +17 -18
package/how-to-generate-docs.md +40 -28
package/package.json +2 -3
package/generated-docs/ai/functions/extractStructuredData.mdx +0 -255
package/generated-docs/ai/functions/isPageLoaded.mdx +0 -89
package/generated-docs/ai/interfaces/ArraySchema.mdx +0 -36
package/generated-docs/ai/interfaces/BasicSchema.mdx +0 -14
package/generated-docs/ai/interfaces/BooleanSchema.mdx +0 -28
package/generated-docs/ai/interfaces/ImageBufferContentItem.mdx +0 -16
package/generated-docs/ai/interfaces/ImageUrlContentItem.mdx +0 -16
package/generated-docs/ai/interfaces/NumberSchema.mdx +0 -35
package/generated-docs/ai/interfaces/ObjectSchema.mdx +0 -39
package/generated-docs/ai/interfaces/StringSchema.mdx +0 -35
package/generated-docs/ai/interfaces/TextContentItem.mdx +0 -14
package/generated-docs/ai/type-aliases/ContentItem.mdx +0 -12
package/generated-docs/ai/type-aliases/JsonSchema.mdx +0 -47
package/generated-docs/ai/type-aliases/SUPPORTED_MODELS.mdx +0 -85
package/generated-docs/helpers/functions/clickButtonAndWait.mdx +0 -63
package/generated-docs/helpers/functions/clickUntilExhausted.mdx +0 -112
package/generated-docs/helpers/functions/downloadFile.mdx +0 -99
package/generated-docs/helpers/functions/extractMarkdown.mdx +0 -56
package/generated-docs/helpers/functions/filterEmptyValues.mdx +0 -51
package/generated-docs/helpers/functions/goToUrl.mdx +0 -124
package/generated-docs/helpers/functions/processDate.mdx +0 -55
package/generated-docs/helpers/functions/resolveUrl.mdx +0 -165
package/generated-docs/helpers/functions/sanitizeHtml.mdx +0 -113
package/generated-docs/helpers/functions/saveFileToS3.mdx +0 -127
package/generated-docs/helpers/functions/scrollToLoadContent.mdx +0 -83
package/generated-docs/helpers/functions/uploadFileToS3.mdx +0 -121
package/generated-docs/helpers/functions/validateDataUsingSchema.mdx +0 -90
package/generated-docs/helpers/functions/waitForDomSettled.mdx +0 -91
package/generated-docs/helpers/functions/withNetworkSettledWait.mdx +0 -76
package/generated-docs/helpers/interfaces/Attachment.mdx +0 -56
package/generated-docs/helpers/interfaces/S3Configs.mdx +0 -52
package/generated-docs/helpers/interfaces/SanitizeHtmlOptions.mdx +0 -22
package/generated-docs/helpers/type-aliases/AttachmentType.mdx +0 -10
package/generated-docs/helpers/type-aliases/FileType.mdx +0 -61
package/generated-docs/helpers/type-aliases/Trigger.mdx +0 -62

package/dist/ai/export.d.ts CHANGED Viewed

@@ -177,6 +177,10 @@ export interface ObjectSchema extends BasicSchema {
  * @example
  * ```typescript Object Schema
  * import { JsonSchema } from "@intuned/browser/ai";
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
  * export default async function handler(params, page, context){
  * const schema: JsonSchema = {
  *   type: "object",
@@ -207,73 +211,122 @@ export type JsonSchema =
   | ArraySchema
   | ObjectSchema;
 /**
- * Extract structured data from web pages using AI-powered content analysis.
- * @overload From Page or Locator
+ * Extracts structured data from web pages using AI-powered content analysis.
+ *
  * This function provides intelligent data extraction from web pages using various strategies
- * including HTML parsing, image analysis, and Markdown conversion. It supports extraction
- * from entire pages or specific elements, with built-in caching and retry mechanisms.
+ * including HTML parsing, image analysis, and Markdown conversion. Or by using Text or Image Content.
+ * It supports extraction from entire pages or specific elements, with built-in caching and retry mechanisms.
+ *
+ * @overload Extract From Page or Locator
+ *
+ * Extract data from web pages or specific elements using HTML, IMAGE, or MARKDOWN strategies with DOM matching support.
+ *
+ * ## Features and limitations
+ *
+ * **Features:**
+ * - **Smart caching:** Hashes inputs and uses [KV Cache](https://docs.intunedhq.com/docs/01-learn/recipes/kv-cache) for persistent storage
+ * - **DOM matching:** With `enableDomMatching=true`, values match DOM elements for smart caching
+ * - **Multiple strategies:** HTML, IMAGE, or MARKDOWN based on content type
+ * - **Flexible models:** Use any up-to-date model from Anthropic, OpenAI, or Google based on your needs
+ *
+ * **Limitations:**
+ * - **Model variability:** Quality varies by model—experiment to find the best fit
+ * - **DOM complexity:** Dynamic structures can affect caching and matching
+ * - **IMAGE strategy constraints:** Can't capture truncated or off-screen content
+ * - **Schema design:** Complex schemas may reduce accuracy
  *
  * @param {Object} options - Configuration object containing extraction parameters
- * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
- * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract. This can be a JsonSchema or ZodSchema
- * @param {string} [options.strategy="HTML"] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN". Defaults to "HTML"
- * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
- * @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. Defaults to false. When set to true, all types in the schema must be strings to match with the DOM elements. The extracted results will be matched with the DOM elements and returned, then cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
- * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
- * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
- * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001"
- * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
- *
- * @returns Promise resolving to the extracted structured data matching the provided schema
+ * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element.
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - Schema defining the structure of the data to extract. Can be a JSON Schema object or a Zod schema.
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context. Defaults to undefined.
+ * @param {("HTML"|"IMAGE"|"MARKDOWN")} [options.strategy="HTML"] - Type of extraction strategy:
+ * - **"HTML"** (default) - Best for text-heavy pages with structured content
+ * - **"IMAGE"** - Best for visual content, charts, or complex layouts
+ * - **"MARKDOWN"** - Best for article-style content with semantic structure
+ * @param {boolean} [options.enableDomMatching=false] - Whether to enable DOM element matching during extraction. You must enable cache for this to work. When enabled, extraction results are mapped to their corresponding DOM elements and returned with matched results. These results are intelligently cached, allowing subsequent extractions with minor DOM changes to utilize the cached data for improved performance. Defaults to false.
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of extraction results. Defaults to true.
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3.
+ * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001".
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account). Defaults to undefined.
+ *
+ * @returns {Promise<any>} The extracted structured data conforming to the provided schema.
+ *
  * @example
- * ```typescript Page source
+ * ```typescript Extract book details
  * import { extractStructuredData } from '@intuned/browser/ai';
- * export default async function handler(params, page, context){
- * await page.goto("https://books.toscrape.com/")
- * const product = await extractStructuredData({
- *   source: page,
- *   strategy: "HTML",
- *   model: "claude-haiku-4-5-20251001",
- *   dataSchema: {
- *     type: "object",
- *     properties: {
- *       name: { type: "string" },
- *       price: { type: "string" },
- *       description: { type: "string" },
- *       inStock: { type: "boolean" }
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   await page.goto('https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html');
+ *   // This will extract the book details from the page, using the HTML strategy with the gpt-4o model.
+ *   // The dataSchema is a JSON Schema object that defines the structure of the data to extract.
+ *   // You can also use a Zod schema instead of a JSON Schema object.
+ *   const book = await extractStructuredData({
+ *     source: page,
+ *     strategy: "HTML", // The HTML strategy is the default strategy and will be used if no strategy is provided.
+ *     model: "gpt-4o",
+ *     dataSchema: {
+ *       type: "object",
+ *       properties: {
+ *         name: { type: "string" },
+ *         price: { type: "string" },
+ *         description: { type: "string" },
+ *         inStock: { type: "boolean" },
+ *         rating: { type: "string" }
+ *       },
+ *       required: ["name", "price"]
  *     },
- *     required: ["name", "price"]
- *   },
- *   prompt: "Extract product details from this e page"
- * });
- * console.log(`Found book: ${product.name} - ${product.price}`);
+ *     prompt: "Extract book details from this page",
+ *     enableCache: true, // since this is true, the method will call AI for the first time, and then whenever you call this method it will return cached results as long as the DOM is the same.
+ *     enableDomMatching: true, // since this is true, the method will return the results mapped to the DOM elements, you MUST enable cache for this to work.
+ *     maxRetries: 3
+ *   });
+ *
+ *   console.log(`Found book: ${book.name} - ${book.price}`);
  * }
  * ```
  *
  * @example
- * ```typescript Locator source
+ * ```typescript Extract all books listings
  * import { extractStructuredData } from '@intuned/browser/ai';
- * export default async function handler(params, page, context){
- * await page.goto("https://books.toscrape.com/")
- * const articleContainer = page.locator("article").first()
- * const article = await extractStructuredData({
- *   source: articleContainer,
- *   strategy: "MARKDOWN",
- *   model: "claude-3-7-sonnet-latest",
- *   dataSchema: {
- *   type: "object",
- *   properties: {
- *     title: { type: "string" },
- *     author: { type: "string" },
- *     publishDate: { type: "string" },
- *     content: { type: "string" },
- *   },
- *   required: ["title"]
- * },
- *   maxRetries: 5
- * });
- * console.log(`Found book: ${article.title}`);
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   await page.goto('https://books.toscrape.com/');
+ *   // This will extract all the books listings from the page, using the HTML strategy with the claude-3-7-sonnet-latest model.
+ *   // The dataSchema is a JSON Schema object that defines the structure of the data to extract.
+ *   // You can also use a Zod schema instead of a JSON Schema object.
+ *   const books = await extractStructuredData({
+ *     source: page,
+ *     strategy: "HTML",
+ *     model: "claude-3-7-sonnet-latest",
+ *     dataSchema: {
+ *       type: "object",
+ *       properties: {
+ *         products: {
+ *           type: "array",
+ *           items: {
+ *             type: "object",
+ *             properties: {
+ *               title: { type: "string" },
+ *               price: { type: "string" },
+ *               availability: { type: "string" }
+ *             }
+ *           }
+ *         }
+ *       }
+ *     },
+ *     prompt: "Extract all book listings",
+ *     enableCache: false, // In this example, we don't want to cache the extracted data, we want to extract the data every time.
+ *   });
+ *
+ *   for (const book of books.products) {
+ *     console.log(`${book.title}: ${book.price}`);
+ *   }
  * }
  * ```
  */
@@ -290,76 +343,110 @@ export declare function extractStructuredData(options: {
 }): Promise<any>;
 /**
- * Extract structured data from content items (text, images) using AI-powered analysis.
- * @overload From Content
- * This overload provides a simplified interface for data extraction from various content types
- * without requiring a page source or extraction strategy. It accepts text content, image buffers,
- * or image URLs and extracts structured data according to the provided schema.
+ * Extracts structured data from web pages using AI-powered content analysis.
+ *
+ * This function provides intelligent data extraction from web pages using various strategies
+ * including HTML parsing, image analysis, and Markdown conversion. Or by using Text or Image Content.
+ * It supports extraction from entire pages or specific elements, with built-in caching and retry mechanisms.
+ *
+ * @overload Extract From Content
+ *
+ * Extract data from text, image buffers, or image URLs without requiring a page source.
+ *
+ * ## Features and limitations
+ *
+ * **Features:**
+ * - **Smart caching:** Hashes content and uses [KV Cache](https://docs.intunedhq.com/docs/01-learn/recipes/kv-cache) for persistent storage
+ * - **Multiple content items:** Combine text, images (buffer or URL) for comprehensive extraction
+ * - **Flexible models:** Use any up-to-date model from Anthropic, OpenAI, or Google based on your needs
+ *
+ * **Limitations:**
+ * - **Model variability:** Quality varies by model—experiment to find the best fit
+ * - **Schema design:** Complex schemas may reduce accuracy
+ * - **Content quality:** Requires meaningful, contextual content for accurate extraction—sparse or ambiguous content produces poor results
  *
  * @param {Object} options - Configuration object containing extraction parameters
- * @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of content items
- * @param {JsonSchema | z.ZodSchema} options.dataSchema - JsonSchema defining the structure of the data to extract
- * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
- * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true
- * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3
- * @param {string} options.model - AI model to use for extraction (e.g., "gpt-4", "claude-3"). Defaults to "claude-haiku-4-5-20251001"
- * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)
+ * @param {ContentItem[] | ContentItem} options.content - Content to extract data from - can be a single content item or array of [ContentItem](../type-references/ContentItem).
+ * @param {JsonSchema | z.ZodSchema} options.dataSchema - Schema defining the expected structure of the extracted data. Can be a JSON Schema object or a Zod schema.
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context. Defaults to undefined.
+ * @param {number} [options.maxRetries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, API errors, output errors, etc. Defaults to 3.
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. Defaults to true.
+ * @param {string} [options.model="claude-haiku-4-5-20251001"] - AI model to use for extraction. Defaults to "claude-haiku-4-5-20251001".
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account). Defaults to undefined.
  *
- * @returns Promise resolving to the extracted structured data matching the provided schema
+ * @returns {Promise<any>} The extracted structured data conforming to the provided schema.
  *
  * @example
- * ```typescript Text Content
- * import { extractStructuredData } from '@intuned/browser/ai';
- * export default async function handler(params, page, context){
- * const textContent: TextContentItem = {
- *   type: "text",
- *   data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
- * };
+ * ```typescript Basic Text Content Extraction
+ * import { extractStructuredData, TextContentItem } from '@intuned/browser/ai';
+ * import { BrowserContext, Page } from "playwright";
  *
- * const person = await extractStructuredData({
- *   content: textContent,
- *   model: "claude-haiku-4-5-20251001",
- *   dataSchema: {
- *   type: "object",
- *   properties: {
- *     name: { type: "string" },
- *     age: { type: "number" },
- *     occupation: { type: "string" },
- *     company: { type: "string" }
- *   },
- *   required: ["name"]
- * },
- *   prompt: "Extract person information from the text"
- * });
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   // This will extract the person information from the text, using the gpt-4o model.
+ *   const textContent: TextContentItem = {
+ *     type: "text",
+ *     data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
+ *   };
  *
- * console.log(`Found person: ${person.name}, ${person.age} years old`);
+ *   const person = await extractStructuredData({
+ *     content: textContent,
+ *     model: "gpt-4o",
+ *     dataSchema: {
+ *       type: "object",
+ *       properties: {
+ *         name: { type: "string" },
+ *         age: { type: "number" },
+ *         occupation: { type: "string" },
+ *         company: { type: "string" }
+ *       },
+ *       required: ["name"]
+ *     },
+ *     prompt: "Extract person information from the text"
+ *   });
+ *
+ *   console.log(`Found person: ${person.name}, ${person.age} years old`);
  * }
  * ```
  *
  * @example
- * ```typescript Multiple Content Items
- * import { extractStructuredData } from '@intuned/browser/ai';
- * export default async function handler(params, page, context){
- * const mixedContent = [
- *   { type: "text", data: "Product: iPhone 15" },
- *   { type: "image-url", image_type: "jpeg", data: "https://mintcdn.com/intuned-7/asXJUUPBWwDlStUB/logo/light.svg?fit=max&auto=format&n=asXJUUPBWwDlStUB&q=85&s=6525c0b299b3226464eba6afa9b7ebe6" }
- * ];
+ * ```typescript List Extraction from Text Content
+ * import { extractStructuredData, TextContentItem } from '@intuned/browser/ai';
+ * import { BrowserContext, Page } from "playwright";
  *
+ * interface Params {}
  *
- * const product = await extractStructuredData({
- *   content: mixedContent,
- *   model: "claude-haiku-4-5-20251001",
- *   dataSchema: {
- *   type: "object",
- *   properties: {
- *     name: { type: "string" },
- *     price: { type: "string" },
- *     features: { type: "array", items: { type: "string" } }
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   const textContent: TextContentItem = {
+ *     type: "text",
+ *     data: "iPhone 15 - $999, Samsung Galaxy - $899, Pixel 8 - $699"
+ *   };
+ *
+ *   const products = await extractStructuredData({
+ *     content: textContent,
+ *     model: "gpt-4o",
+ *     dataSchema: {
+ *       type: "object",
+ *       properties: {
+ *         products: {
+ *           type: "array",
+ *           items: {
+ *             type: "object",
+ *             properties: {
+ *               name: { type: "string" },
+ *               price: { type: "string" }
+ *             }
+ *           }
+ *         }
+ *       }
+ *     },
+ *     prompt: "Extract all products"
+ *   });
+ *
+ *   for (const product of products.products) {
+ *     console.log(`${product.name}: ${product.price}`);
  *   }
- * },
- *   maxRetries: 1,
- *   enableCache: true
- * });
  * }
  * ```
  */
@@ -369,7 +456,7 @@ export declare function extractStructuredData(options: {
   prompt?: string;
   maxRetries?: number;
   enableCache?: boolean;
-  model: string;
+  model?: string;
   apiKey?: string;
 }): Promise<any>;
@@ -380,21 +467,27 @@ export declare function extractStructuredData(options: {
  * @param {Object} input - Input object containing the page to check
  * @param {Page} input.page - The Playwright page to check
  * @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds. Defaults to 10000
- * @param {string} [input.model="claude-haiku-4-5-20251001"] - AI model to use for the check. Defaults to "claude-haiku-4-5-20251001"
- * @param {string} [input.apiKey] - Optional API key for the AI service (if provided, will not be billed to your account)
- * @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading
+ * @param {string} [input.model="gpt-5-mini-2025-08-07"] - AI model to use for the check. Defaults to "gpt-5-mini-2025-08-07"
+ * @param {string} [input.apiKey] - Optional API key for the AI call.
+ * @returns {Promise<boolean>} Promise resolving to true if page is loaded, false if still loading.
  * @example
  * ```typescript Check Page Loading
  * import { isPageLoaded } from "@intuned/browser/ai";
- * export default async function handler(params, page, context){
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
  * // Wait for page to finish loading
- * await page.goto('https://example.com');
+ * await page.goto('https://sandbox.intuned.dev/');
  *
  * const pageLoaded = await isPageLoaded({page});
  * if (pageLoaded) {
  *   // Continue with scraping or interactions
+ *   console.log("Page is loaded");
  * } else {
  *   // Wait longer or retry
+ *   await page.waitForTimeout(5000);
  * }
  * }
  * ```
@@ -402,24 +495,27 @@ export declare function extractStructuredData(options: {
  * @example
  * ```typescript Loading Loop
  * import { isPageLoaded } from "@intuned/browser/ai";
- * export default async function handler(params, page, context){
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
  * // Keep checking until page loads
  * await page.goto("https://example.com");
  * let attempts = 0;
- * while (attempts < 10) {
+ * while (attempts < 10) { // We will retry up to 10 times with a 2-second delay between attempts.
  *   const pageLoaded = await isPageLoaded({
  *     page,
- *     model: "claude-haiku-4-5-20251001",
+ *     model: "claude-3-7-sonnet-latest",
  *     timeoutInMs: 5000
  *   });
- *   if (pageLoaded) break;
+ *   if (pageLoaded) break; // If the page is loaded, break the loop.
  *
- *   await page.waitForTimeout(2000);
+ *   await page.waitForTimeout(2000); // Wait for 2 seconds before the next attempt.
  *   attempts++;
  * }
  * }
  * ```
- * }
  */
 export declare function isPageLoaded(input: {
   page: Page;
@@ -429,56 +525,108 @@ export declare function isPageLoaded(input: {
 }): Promise<boolean>;
 /**
- * Represents text content for AI extraction.
- * Used when passing text data directly to extractStructuredData without a page source.
+ * Text content item for content-based extraction.
  *
  * @interface TextContentItem
- * @property {string} type - The type of the content item, which is always "text"
- * @property {string} data - The text content to extract data from
  */
 export interface TextContentItem {
+  /** The type of the content item, which is always "text". */
   type: "text";
+  /** The text data to extract from. */
   data: string;
 }
 /**
- * Represents image content provided as a Buffer for AI extraction.
- * Used when passing image data directly to extractStructuredData without a page source.
- * The image will be analyzed by AI vision models for data extraction.
+ * Image buffer content item for content-based extraction.
  *
  * @interface ImageBufferContentItem
- * @property {string} type - The type of the content item, which is always "image-buffer"
- * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
- * @property {Buffer} data - The Buffer containing the raw image data
  */
 export interface ImageBufferContentItem {
+  /** The type of the content item, which is always "image-buffer". */
   type: "image-buffer";
+  /** The image format (e.g., "png", "jpeg", "gif", "webp"). */
   image_type: "png" | "jpeg" | "gif" | "webp";
+  /** The Buffer containing the raw image data. */
   data: Buffer;
 }
 /**
- * Represents image content provided as a URL for AI extraction.
- * Used when passing image URLs directly to extractStructuredData without a page source.
- * The image will be fetched from the URL and analyzed by AI vision models for data extraction.
+ * Image URL content item for content-based extraction.
  *
  * @interface ImageUrlContentItem
- * @property {string} type - The type of the content item, which is always "image-url"
- * @property {string} image_type - The image format (e.g., "png", "jpeg", "gif", "webp")
- * @property {string} data - The URL of the image to fetch and analyze
  */
 export interface ImageUrlContentItem {
+  /** The type of the content item, which is always "image-url". */
   type: "image-url";
+  /** The image format (e.g., "png", "jpeg", "gif", "webp"). */
   image_type: "png" | "jpeg" | "gif" | "webp";
+  /** The URL of the image. */
   data: string;
 }
 /**
- * Union type representing all content items for AI data extraction.
+ * A union type representing content items for AI data extraction from various content types.
+ *
+ * This type alias defines the complete set of content types supported by the content-based
+ * extractStructuredData function for extracting data from text, image buffers, or image URLs
+ * without requiring a page source.
+ *
+ * **Type variants:**
+ * - `TextContentItem`: [TextContentItem](../type-references/TextContentItem) for text data extraction
+ * - `ImageBufferContentItem`: [ImageBufferContentItem](../type-references/ImageBufferContentItem) for image data stored as Buffer
+ * - `ImageUrlContentItem`: [ImageUrlContentItem](../type-references/ImageUrlContentItem) for image data accessible via URL
+ *
  * @type ContentItem
- * @property {TextContentItem} type - [TextContentItem](../interfaces/TextContentItem) type. Used when passing text data directly to extractStructuredData without a page source.
- * @property {ImageBufferContentItem} type - [ImageBufferContentItem](../interfaces/ImageBufferContentItem) type. Used when passing image data directly to extractStructuredData without a page source.
- * @property {ImageUrlContentItem} type - [ImageUrlContentItem](../interfaces/ImageUrlContentItem) type. Used when passing image URLs directly to extractStructuredData without a page source.
+ *
+ * @example
+ * ```typescript Text Content
+ * import { TextContentItem } from "@intuned/browser";
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   const textContent: TextContentItem = {
+ *     type: "text",
+ *     data: "John Doe, age 30, works as a Software Engineer at Tech Corp"
+ *   };
+ * }
+ * ```
+ *
+ * @example
+ * ```typescript Image Buffer Content
+ * import { ImageBufferContentItem } from "@intuned/browser";
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   // Assuming you have image data as Buffer
+ *   const imageData = fs.readFileSync("image.png");
+ *
+ *   const imageContent: ImageBufferContentItem = {
+ *     type: "image-buffer",
+ *     image_type: "png",
+ *     data: imageData
+ *   };
+ * }
+ * ```
+ *
+ * @example
+ * ```typescript Image URL Content
+ * import { ImageUrlContentItem } from "@intuned/browser";
+ * import { BrowserContext, Page } from "playwright";
+ *
+ * interface Params {}
+ *
+ * export default async function handler(params: Params, page: Page, context: BrowserContext){
+ *   const imageContent: ImageUrlContentItem = {
+ *     type: "image-url",
+ *     image_type: "jpeg",
+ *     data: "https://example.com/image.jpg"
+ *   };
+ * }
+ * ```
  */
 export type ContentItem =
   | TextContentItem

package/dist/ai/extractStructuredDataUsingAi.js CHANGED Viewed

@@ -154,7 +154,30 @@ async function extractStructuredDataUsingAi(input) {
       });
       _Logger.logger.info(`Extraction failed,
          Total LLM ${isGateway ? "Cost In Cents" : "Tokens"}: ${accumulatedTokens}`);
-      return (0, _neverthrow.err)(Errors.invalidExtractionResult(error instanceof Error ? error.message : "Unknown error during extraction"));
+      let errorMessage = "Unknown error during extraction";
+      if (error instanceof Error) {
+        errorMessage = error.message;
+        const apiError = error;
+        if (apiError.responseBody) {
+          try {
+            const responseBody = JSON.parse(apiError.responseBody);
+            if (responseBody.error) {
+              if (typeof responseBody.error === "string") {
+                errorMessage = responseBody.error;
+              } else if (responseBody.error.message) {
+                errorMessage = responseBody.error.message;
+              } else {
+                errorMessage = JSON.stringify(responseBody.error);
+              }
+            }
+          } catch {
+            if (typeof apiError.responseBody === "string") {
+              errorMessage = apiError.responseBody;
+            }
+          }
+        }
+      }
+      return (0, _neverthrow.err)(Errors.invalidExtractionResult(errorMessage));
     }
   }
   _Logger.logger.info(`Extraction failed.