npm - @intuned/browser-dev - Versions diffs - 2.2.3-unify-sdks.26 → 2.2.3-unify-sdks.27 - Mend

@intuned/browser-dev 2.2.3-unify-sdks.26 → 2.2.3-unify-sdks.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/dist/ai/export.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Locator, Page } from "playwright-core";
-import { JSONSchema7TypeName } from "json-schema";
 /**
  * Base schema interface that all JSON schema types extend from.
  * Provides common properties like type and description.
@@ -159,22 +159,22 @@ export interface ObjectSchema extends BasicSchema {
  * including HTML parsing, image analysis, and Markdown conversion. It supports extraction
  * from entire pages or specific elements, with built-in caching and retry mechanisms.
  *
- * @param options - Configuration object containing extraction parameters
- * @param {Page | Locator} [options.source] - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
+ * @param {Object} options - Configuration object containing extraction parameters
+ * @param {Page | Locator} options.source - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
+ * @param {JsonSchema} options.dataSchema - [JsonSchema](../interfaces/JsonSchema) defining the structure of the data to extract
  * @param {SUPPORTED_MODELS} [options.model] - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
  * @param {string} [options.strategy] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN"
- * @param {JsonSchema} options.dataSchema - [JsonSchema](../interfaces/JsonSchema) defining the structure of the data to extract
  * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
  * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)s
  * @param {boolean} [options.enableDomMatching=false] - Whether to disable DOM element matching during extraction. Defaults to False. When set to false, all types in the schema must be strings to match with the DOM elements. The extracted resultes will be matched with the DOM elements and returned, then will be cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
  * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. default true
-* @param {integer} [options.retries=3] - Maximum number of retry attempts on failure. default 3
+* @param {integer} [options.retries=3] - Maximum number of retry attempts on failures. Failures can be validation errors, api errors, output errors, etc.
  *
  * @returns Promise resolving to the extracted structured data matching the provided schema
  * @example
  * ```typescript Extract Product Information from Entire Page
- * import { extractStructuredData } from './extractors';
+ * import { extractStructuredData } from '@intuned/browser/ai';
  *
  * const productSchema = {
  *   type: "object",
@@ -188,11 +188,10 @@ export interface ObjectSchema extends BasicSchema {
  * };
  *
  * const product = await extractStructuredData({
- *   page: page,
- *   strategy: { type: "HTML", model: "gpt-4" },
+ *   source: page,
+ *   strategy: "HTML",
+ *   model: "gpt-4o",
  *   dataSchema: productSchema,
- *   entityName: "product",
- *   label: "product-extractor",
  *   prompt: "Extract product details from this e-commerce page"
  * });
  *
@@ -201,7 +200,7 @@ export interface ObjectSchema extends BasicSchema {
  *
  * @example
  * ```typescript Extract Article Data from Specific Element
- * import { extractStructuredData } from './extractors';
+ * import { extractStructuredData } from '@intuned/browser/ai';
  *
  * const articleSchema = {
  *   type: "object",
@@ -217,12 +216,11 @@ export interface ObjectSchema extends BasicSchema {
  *
  * const articleContainer = page.locator("article.main-content");
  * const article = await extractStructuredData({
- *   locator: articleContainer,
- *   strategy: { type: "MARKDOWN", model: "claude-3" },
+ *   source: articleContainer,
+ *   strategy: "MARKDOWN",
+ *   model: "claude-3",
  *   dataSchema: articleSchema,
- *   entityName: "article",
- *   label: "article-extractor",
- *   retries: 5
+ *   maxRetries: 5
  * });
  *
  * console.log(`Article: ${article.title} by ${article.author}`);
@@ -230,7 +228,7 @@ export interface ObjectSchema extends BasicSchema {
  *
  * @example
  * ```typescript Extract Data from Screenshots using Image Strategy
- * import { extractStructuredData } from './extractors';
+ * import { extractStructuredData } from '@intuned/browser/ai';
  *
  * const chartSchema = {
  *   type: "object",
@@ -251,11 +249,10 @@ export interface ObjectSchema extends BasicSchema {
  *
  * const chartElement = page.locator("#data-visualization");
  * const chartData = await extractStructuredData({
- *   locator: chartElement,
- *   strategy: { type: "IMAGE", model: "gpt-4-vision" },
+ *   source: chartElement,
+ *   strategy: "IMAGE",
+ *   model: "gpt-4o",
  *   dataSchema: chartSchema,
- *   entityName: "chart",
- *   label: "chart-extractor",
  *   prompt: "Extract the chart title and all data points with their values"
  * });
  *
@@ -417,229 +414,43 @@ type SUPPORTED_MODELS = SUPPORTED_CLAUDE_MODELS | SUPPORTED_OPENAI_MODELS;
  * ```
  */
-/**
- * @interface HTMLStrategy
- * Represents a strategy for extracting data from HTML content using AI models.
- *
- * This strategy processes the HTML structure of a page or element, focusing on semantic attributes
- * for better context understanding. It automatically filters and includes only relevant HTML attributes:
- * `aria-label`, `data-name`, `name`, `type`, `placeholder`, `value`, `role`, `title`, `href`, `id`, `alt`
- *
- * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
- * @param {string} type - Type of extraction: 'HTML'
- *
- * @example
- * ```typescript Basic HTML Extraction
- * const htmlStrategy: HTMLStrategy = {
- *   type: "HTML",
- *   model: "gpt-4"
- * };
- *
- * const data = await extractStructuredData({
- *   page: page,
- *   strategy: htmlStrategy,
- *   // ... other options
- * });
- * ```
- *
- * @example
- * ```typescript Advanced HTML Extraction
- * const htmlStrategy: HTMLStrategy = {
- *   type: "HTML",
- *   model: "claude-3-sonnet-20240620"
- * };
- *
- * // Extract product details from a specific container
- * const productData = await extractStructuredData({
- *   locator: page.locator('.product-container'),
- *   strategy: htmlStrategy,
- *   dataSchema: productSchema,
- *   entityName: "product",
- *   label: "product-extractor"
- * });
- * ```
- */
-export interface HTMLStrategy {
-  /** The AI model to use for content analysis and data extraction */
-  model: SUPPORTED_MODELS;
-  /** Strategy type identifier, must be "HTML" for HTML-based extraction */
-  type: "HTML";
-}
-/**
- * @interface ImageStrategy
- * Represents a strategy for extracting data from visual content using AI vision models.
- *
- * This strategy captures screenshots of the target page or element and uses AI vision
- * capabilities to extract information. It's particularly useful for:
- * - Data embedded in images or charts
- * - Content with complex visual layouts
- * - Information that's not directly accessible in the HTML
- *
- * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
- * @param {string} type - Type of extraction: 'IMAGE'
- * @example
- * ```typescript Basic Image Analysis
- * const imageStrategy: ImageStrategy = {
- *   type: "IMAGE",
- *   model: "gpt-4-vision"
- * };
- *
- * const chartData = await extractStructuredData({
- *   locator: page.locator('.chart-container'),
- *   strategy: imageStrategy,
- *   dataSchema: chartSchema,
- *   entityName: "chart",
- *   label: "chart-data-extractor"
- * });
- * ```
- *
- * @example
- * ```typescript Complex Visual Extraction
- * const imageStrategy: ImageStrategy = {
- *   type: "IMAGE",
- *   model: "claude-3-sonnet-20240620"
- * };
- *
- * // Extract data from a complex dashboard
- * const dashboardData = await extractStructuredData({
- *   page: page,
- *   strategy: imageStrategy,
- *   dataSchema: dashboardSchema,
- *   entityName: "dashboard",
- *   label: "dashboard-metrics",
- *   prompt: "Extract all metrics and their values from this dashboard view"
- * });
- * ```
- */
-export interface ImageStrategy {
-  /** The AI vision model to use for image analysis and data extraction */
-  model: SUPPORTED_MODELS;
-  /** Strategy type identifier, must be "IMAGE" for image-based extraction */
-  type: "IMAGE";
-}
-/**
- * @interface MarkDownStrategy
- * Represents a strategy for extracting data from content after converting it to Markdown format.
- *
- * This strategy first converts the HTML content to semantic Markdown before processing,
- * which helps in:
- * - Preserving content hierarchy and structure
- * - Removing unnecessary styling and formatting
- * - Focusing on semantic meaning of the content
- * - Handling content-heavy pages more efficiently
- *
- * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
- * @param {string} type - Type of extraction: 'MARKDOWN'
- * @example
- * ```typescript Basic Article Extraction
- * const markdownStrategy: MarkDownStrategy = {
- *   type: "MARKDOWN",
- *   model: "gpt-4"
- * };
- *
- * const articleData = await extractStructuredData({
- *   locator: page.locator('article'),
- *   strategy: markdownStrategy,
- *   dataSchema: articleSchema,
- *   entityName: "article",
- *   label: "article-content"
- * });
- * ```
- *
- * @example
- * ```typescript Documentation Extraction
- * const markdownStrategy: MarkDownStrategy = {
- *   type: "MARKDOWN",
- *   model: "claude-3-sonnet-20240620"
- * };
- *
- * // Extract structured data from documentation pages
- * const docData = await extractStructuredData({
- *   page: page,
- *   strategy: markdownStrategy,
- *   dataSchema: documentationSchema,
- *   entityName: "documentation",
- *   label: "docs-extractor",
- *   prompt: "Extract main concepts, code examples, and API references"
- * });
- * ```
- */
-export interface MarkDownStrategy {
-  /** The AI model to use for processing the Markdown content */
-  model: SUPPORTED_MODELS;
-  /** Strategy type identifier, must be "MARKDOWN" for Markdown-based extraction */
-  type: "MARKDOWN";
-}
-/**
- * @interface HtmlStrategy
- * Represents a strategy for extracting data from HTML content using AI models.
- * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
- * @param {string} type - Type of extraction: 'HTML'
- * @example
- * ```typescript Basic HTML Extraction
- * const htmlStrategy: HtmlStrategy = {
- *   type: "HTML",
- *   model: "gpt-4"
- * };
- * ```
- * @example
- * ```typescript Advanced HTML Extraction
- * const htmlStrategy: HtmlStrategy = {
- *   type: "HTML",
- *   model: "claude-3-sonnet-20240620"
- * };
- * ```
- */
-export interface HtmlStrategy {
-  type: "HTML";
-  model: SUPPORTED_MODELS;
-}
 /**
  * Uses AI vision to determine if a webpage has finished loading by analyzing a screenshot.
  * Detects loading spinners, blank content, or incomplete page states.
- *
- * @param {Page} page - The Playwright page to check
- * @param {Object} [options] - Optional configuration object
- * @param {SUPPORTED_MODELS} [options.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) to use for the AI Check. default gpt-4o-2024-08-06
- * @param {number} [options.timeoutInMs=10000] - Screenshot timeout in milliseconds
- * @param {string} [options.apiKey] - Optional API key for the AI service
- * @returns {Promise.<{status: LoadingStatus, reason: (string|null|undefined), cost: (number|undefined)}>}
- * - `status`: "True" if page is loaded, "False" if still loading, "Dont know" if uncertain
- * - `reason`: Optional reason for the status (e.g., detected loading spinner)
- * - `cost`: Optional cost of the AI analysis (if applicable)
+ * @param {Object} input - Input object containing the page to check
+ * @param {Page} input.page - The Playwright page to check
+ * @param {number} [input.timeoutInMs=10000] - Screenshot timeout in milliseconds
+ * @param {SUPPORTED_MODELS} [input.model="gpt-4o-2024-08-06"] - [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) to use for the AI Check. default gpt-4o-2024-08-06
+ * @param {string} [input.apiKey] - Optional API key for the AI service
+ * @returns {Promise.<boolean>} True if page is loaded, false if still loading
  * @example
  * ```typescript Check Page Loading
- * import { isPageLoaded } from "@intuned/sdk/helpers";
+ * import { isPageLoaded } from "@intuned/browser/ai";
  *
  * // Wait for page to finish loading
  * await page.goto('https://example.com');
  *
- * const pageLoaded = await isPageLoaded(page);
- * if (pageLoaded['status']) {
- *   console.log("Page loaded:", pageLoaded['reason']);
+ * const pageLoaded = await isPageLoaded({page});
+ * if (pageLoaded) {
  *   // Continue with scraping or interactions
  * } else {
- *   console.log("Still loading:", pageLoaded['reason']);
  *   // Wait longer or retry
  * }
  * ```
  *
  * @example
  * ```typescript Loading Loop
- * import { isPageLoaded } from "@intuned/sdk/helpers";
+ * import { isPageLoaded } from "@intuned/browser/ai";
  *
  * // Keep checking until page loads
  * let attempts = 0;
  * while (attempts < 10) {
- *   const pageLoaded = await isPageLoaded(page, "gpt-4o", 5);
- *   if (pageLoaded['status']) break;
+ *   const pageLoaded = await isPageLoaded({
+ *     page,
+ *     model: "gpt-4o",
+ *     timeoutInMs: 5000
+ *   });
+ *   if (pageLoaded) break;
  *
  *   await page.waitForTimeout(2000);
  *   attempts++;
@@ -647,23 +458,13 @@ export interface HtmlStrategy {
  * ```
  *
  */
-export declare function isPageLoaded(
-  page: Page,
-  options?: {
-    timeoutInMs?: number;
-    model?: SUPPORTED_MODELS;
-    apiKey?: string;
-  }
-): Promise<{
-  status: LoadingStatus;
-  reason?: string | null;
-  cost?: number;
-}>;
+export declare function isPageLoaded(input: {
+  page: Page;
+  timeoutInMs?: number;
+  model?: SUPPORTED_MODELS;
+  apiKey?: string;
+}): Promise<boolean>;
-/**
- * LoadingStatus is a union of true, false, and "Dont know".
- */
-export type LoadingStatus = true | false | "Dont know";
 export type JsonSchema =
   | StringSchema
   | NumberSchema