npm - @intuned/browser-dev - Versions diffs - 2.2.3-unify-sdks.20 - Mend

@intuned/browser-dev 2.2.3-unify-sdks.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (195) hide show

package/dist/browser/ai/index.d.ts ADDED Viewed

@@ -0,0 +1,587 @@
+import { Locator, Page } from "playwright-core";
+import { ObjectSchema } from "./jsonSchema";
+import { JSONSchema7TypeName } from "json-schema";
+/**
+ * Base schema interface that all JSON schema types extend from.
+ * Provides common properties like type and description.
+ *
+ * @interface BaseSchema
+ */
+export interface BaseSchema {
+  /** The JSON schema type(s) for this schema definition */
+  type: JSONSchema7TypeName | JSONSchema7TypeName[];
+  /** Optional description of what this schema represents */
+  description?: string;
+}
+/**
+ * Schema definition for string values with validation constraints.
+ *
+ * @interface StringSchema
+ * @extends BaseSchema
+ * @example
+ * ```typescript
+ * const nameSchema: StringSchema = {
+ *   type: "string",
+ *   minLength: 2,
+ *   maxLength: 50,
+ *   pattern: "^[A-Za-z\\s]+$",
+ *   description: "Person's full name"
+ * };
+ * ```
+ */
+export interface StringSchema extends BaseSchema {
+  /** Must be "string" for string schemas */
+  type: "string";
+  /** Array of allowed string values (enumeration) */
+  enum?: string[];
+  /** Maximum allowed string length */
+  maxLength?: number;
+  /** Minimum required string length */
+  minLength?: number;
+  /** Regular expression pattern the string must match */
+  pattern?: string;
+}
+/**
+ * Schema definition for numeric values (numbers and integers) with validation constraints.
+ *
+ * @interface NumberSchema
+ * @extends BaseSchema
+ * @example
+ * ```typescript
+ * const ageSchema: NumberSchema = {
+ *   type: "integer",
+ *   minimum: 0,
+ *   maximum: 150,
+ *   description: "Person's age in years"
+ * };
+ * ```
+ */
+export interface NumberSchema extends BaseSchema {
+  /** Must be "number" or "integer" for numeric schemas */
+  type: "number" | "integer";
+  /** Number must be a multiple of this value */
+  multipleOf?: number;
+  /** Maximum allowed value (inclusive) */
+  maximum?: number;
+  /** Maximum allowed value (exclusive) */
+  exclusiveMaximum?: number;
+  /** Minimum allowed value (inclusive) */
+  minimum?: number;
+  /** Minimum allowed value (exclusive) */
+  exclusiveMinimum?: number;
+}
+/**
+ * Schema definition for boolean values.
+ *
+ * @interface BooleanSchema
+ * @extends BaseSchema
+ * @example
+ * ```typescript
+ * const isActiveSchema: BooleanSchema = {
+ *   type: "boolean",
+ *   description: "Whether the user account is active"
+ * };
+ * ```
+ */
+export interface BooleanSchema extends BaseSchema {
+  /** Must be "boolean" for boolean schemas */
+  type: "boolean";
+}
+/**
+ * Schema definition for array values with item validation and constraints.
+ *
+ * @interface ArraySchema
+ * @extends BaseSchema
+ * @example
+ * ```typescript
+ * const tagsSchema: ArraySchema = {
+ *   type: "array",
+ *   items: { type: "string" },
+ *   minItems: 1,
+ *   maxItems: 10,
+ *   uniqueItems: true,
+ *   description: "List of tags"
+ * };
+ * ```
+ */
+export interface ArraySchema extends BaseSchema {
+  /** Must be "array" for array schemas */
+  type: "array";
+  /** Schema definition for array items */
+  items: JSONSchema;
+  /** Maximum number of items allowed */
+  maxItems?: number;
+  /** Minimum number of items required */
+  minItems?: number;
+  /** Whether all items must be unique */
+  uniqueItems?: boolean;
+}
+/**
+ * Schema definition for object values with property validation and constraints.
+ *
+ * @interface ObjectSchema
+ * @extends BaseSchema
+ * @example
+ * ```typescript
+ * const userSchema: ObjectSchema = {
+ *   type: "object",
+ *   properties: {
+ *     name: { type: "string" },
+ *     email: { type: "string", pattern: "^[^@]+@[^@]+\\.[^@]+$" },
+ *     age: { type: "integer", minimum: 0 }
+ *   },
+ *   required: ["name", "email"],
+ *   description: "User profile information"
+ * };
+ * ```
+ */
+export interface ObjectSchema extends BaseSchema {
+  /** Must be "object" for object schemas */
+  type: "object";
+  /** Schema definitions for object properties */
+  properties: { [key: string]: JSONSchema };
+  /** Array of required property names */
+  required?: string[];
+  /** Maximum number of properties allowed */
+  maxProperties?: number;
+  /** Minimum number of properties required */
+  minProperties?: number;
+}
+/**
+ * Extract structured data from web pages using AI-powered content analysis.
+ *
+ * This function provides intelligent data extraction from web pages using various strategies
+ * including HTML parsing, image analysis, and Markdown conversion. It supports extraction
+ * from entire pages or specific elements, with built-in caching and retry mechanisms.
+ *
+ * @param options - Configuration object containing extraction parameters
+ * @param {Page | Locator} [options.source] - Playwright Page object to extract data from the entire page or Locator object to extract data from a specific element
+ * @param {SUPPORTED_MODELS} [options.model] - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
+ * @param {string} [options.strategy] - Type of extraction: "HTML", "IMAGE", or "MARKDOWN"
+ * @param {JSONSchema} options.dataSchema - [JSONSchema](../interfaces/JSONSchema) defining the structure of the data to extract
+ * @param {string} [options.prompt] - Optional prompt to guide the extraction process and provide more context
+ * @param {string} [options.apiKey] - Optional API key for AI extraction (if provided, will not be billed to your account)s
+ * @param {boolean} [options.enableDomMatching=false] - Whether to disable DOM element matching during extraction. Defaults to False. When set to false, all types in the schema must be strings to match with the DOM elements. The extracted resultes will be matched with the DOM elements and returned, then will be cached in a smart fashion so that the next time the same data is extracted, the result will be returned from the cache even if the DOM has minor changes.
+ * @param {boolean} [options.enableCache=true] - Whether to enable caching of the extracted data. default true
+* @param {integer} [options.retries=3] - Maximum number of retry attempts on failure. default 3
+ *
+ * @returns Promise resolving to the extracted structured data matching the provided schema
+ * @example
+ * ```typescript Extract Product Information from Entire Page
+ * import { extractStructuredData } from './extractors';
+ *
+ * const productSchema = {
+ *   type: "object",
+ *   properties: {
+ *     name: { type: "string" },
+ *     price: { type: "string" },
+ *     description: { type: "string" },
+ *     inStock: { type: "boolean" }
+ *   },
+ *   required: ["name", "price"]
+ * };
+ *
+ * const product = await extractStructuredData({
+ *   page: page,
+ *   strategy: { type: "HTML", model: "gpt-4" },
+ *   dataSchema: productSchema,
+ *   entityName: "product",
+ *   label: "product-extractor",
+ *   prompt: "Extract product details from this e-commerce page"
+ * });
+ *
+ * console.log(`Found product: ${product.name} - ${product.price}`);
+ * ```
+ *
+ * @example
+ * ```typescript Extract Article Data from Specific Element
+ * import { extractStructuredData } from './extractors';
+ *
+ * const articleSchema = {
+ *   type: "object",
+ *   properties: {
+ *     title: { type: "string" },
+ *     author: { type: "string" },
+ *     publishDate: { type: "string" },
+ *     content: { type: "string" },
+ *     tags: { type: "array", items: { type: "string" } }
+ *   },
+ *   required: ["title", "content"]
+ * };
+ *
+ * const articleContainer = page.locator("article.main-content");
+ * const article = await extractStructuredData({
+ *   locator: articleContainer,
+ *   strategy: { type: "MARKDOWN", model: "claude-3" },
+ *   dataSchema: articleSchema,
+ *   entityName: "article",
+ *   label: "article-extractor",
+ *   retries: 5
+ * });
+ *
+ * console.log(`Article: ${article.title} by ${article.author}`);
+ * ```
+ *
+ * @example
+ * ```typescript Extract Data from Screenshots using Image Strategy
+ * import { extractStructuredData } from './extractors';
+ *
+ * const chartSchema = {
+ *   type: "object",
+ *   properties: {
+ *     title: { type: "string" },
+ *     dataPoints: {
+ *       type: "array",
+ *       items: {
+ *         type: "object",
+ *         properties: {
+ *           label: { type: "string" },
+ *           value: { type: "number" }
+ *         }
+ *       }
+ *     }
+ *   }
+ * };
+ *
+ * const chartElement = page.locator("#data-visualization");
+ * const chartData = await extractStructuredData({
+ *   locator: chartElement,
+ *   strategy: { type: "IMAGE", model: "gpt-4-vision" },
+ *   dataSchema: chartSchema,
+ *   entityName: "chart",
+ *   label: "chart-extractor",
+ *   prompt: "Extract the chart title and all data points with their values"
+ * });
+ *
+ * console.log(`Chart: ${chartData.title}`);
+ * chartData.dataPoints.forEach(point => {
+ *   console.log(`${point.label}: ${point.value}`);
+ * });
+ * ```
+ */
+export declare function extractStructuredData(options: {
+  source: Page | Locator;
+  dataSchema: JSONSchema;
+  prompt?: string;
+  strategy?: "IMAGE" | "MARKDOWN" | "HTML";
+  model?: SUPPORTED_MODELS;
+  apiKey?: string;
+  enableDomMatching?: boolean;
+  enableCache?: boolean;
+  maxRetries?: number;
+}): Promise<any>;
+/**
+ * This type defines the supported AI models for data extraction.
+ * It includes models from OpenAI, Anthropic, and Google Gemini.
+ * The models are used in the extraction strategies to process and analyze the content of web pages or elements.
+ * @type SUPPORTED_MODELS
+ */
+type SUPPORTED_MODELS =
+  | "claude-opus-4-1-20250805"
+  | "claude-opus-4-20250514"
+  | "claude-sonnet-4-20250514"
+  | "claude-3-7-sonnet-20250219"
+  | "claude-3-5-sonnet-20240620"
+  | "claude-3-5-haiku-latest"
+  | "gpt-5"
+  | "gpt-5-mini"
+  | "gpt-5-nano"
+  | "gpt-5-chat"
+  | "gpt-5-chat-latest"
+  | "gpt-5-2025-08-07"
+  | "gpt-5-mini-2025-08-07"
+  | "gpt-5-nano-2025-08-07"
+  | "gpt-4.1"
+  | "gpt-4.1-mini"
+  | "gpt-4.1-nano"
+  | "o4-mini"
+  | "o3-mini"
+  | "o3"
+  | "o1-mini"
+  | "o1-preview"
+  | "gpt-4o-mini"
+  | "gpt-4o-mini-2024-07-18"
+  | "gpt-4o"
+  | "gpt-4o-2024-08-06"
+  | "gpt-4o-2024-05-13"
+  | "gpt-4o-2024-05-13"
+  | "gpt-4-turbo"
+  | "gpt-4-turbo-preview"
+  | "gpt-4-0125-preview"
+  | "gpt-4-1106-preview"
+  | "gpt-3.5-turbo-1106"
+  | "gpt-3.5-turbo"
+  | "gpt-3.5-turbo-0301"
+  | "gpt-3.5-turbo-0613"
+  | "gpt-3.5-turbo-16k"
+  | "gpt-3.5-turbo-16k-0613"
+  | "gpt-4"
+  | "gpt-4-0314"
+  | "gpt-4-0613"
+  | "gpt-4-32k"
+  | "gpt-4-32k-0314"
+  | "gpt-4-32k-0613"
+  | "gemini-pro"
+  | "gemini-1.5-pro-latest"
+  | "gemini-2.0-flash"
+  | "gemini-2.0-flash-exp"
+  | "gemini-2.0-flash-lite-preview-02-05";
+/**
+ * Represents a JSON Schema definition for validating data structures.
+ * Supports various schema types including string, number, boolean, array, and object schemas
+ * with their respective validation rules and constraints.
+ *
+ * This type is a union of different schema types:
+ * - StringSchema: For string validation with length and pattern constraints
+ * - NumberSchema: For number/integer validation with range constraints
+ * - BooleanSchema: For boolean values
+ * - ArraySchema: For array validation with item constraints
+ * - ObjectSchema: For object validation with property constraints
+ *
+ * @type JSONSchema
+ * @example
+ * ```typescript String Schema
+ * const stringSchema: JSONSchema = {
+ *   type: "string",
+ *   minLength: 3,
+ *   maxLength: 50,
+ *   pattern: "^[A-Za-z]+$"
+ * };
+ * ```
+ *
+ * @example
+ * ```typescript Number Schema
+ * const numberSchema: JSONSchema = {
+ *   type: "number",
+ *   minimum: 0,
+ *   maximum: 100,
+ *   multipleOf: 0.5
+ * };
+ * ```
+ *
+ * @example
+ * ```typescript Array Schema
+ * const arraySchema: JSONSchema = {
+ *   type: "array",
+ *   items: {
+ *     type: "string"
+ *   },
+ *   minItems: 1,
+ *   maxItems: 10,
+ *   uniqueItems: true
+ * };
+ * ```
+ *
+ * @example
+ * ```typescript Object Schema
+ * const objectSchema: JSONSchema = {
+ *   type: "object",
+ *   properties: {
+ *     name: { type: "string" },
+ *     age: { type: "number", minimum: 0 },
+ *     email: { type: "string", pattern: "^[^@]+@[^@]+\\.[^@]+$" }
+ *   },
+ *   required: ["name", "email"]
+ * };
+ * ```
+ */
+export type JSONSchema =
+  | StringSchema
+  | NumberSchema
+  | BooleanSchema
+  | ArraySchema
+  | ObjectSchema
+  | BaseSchema;
+/**
+ * @interface HTMLStrategy
+ * Represents a strategy for extracting data from HTML content using AI models.
+ *
+ * This strategy processes the HTML structure of a page or element, focusing on semantic attributes
+ * for better context understanding. It automatically filters and includes only relevant HTML attributes:
+ * `aria-label`, `data-name`, `name`, `type`, `placeholder`, `value`, `role`, `title`, `href`, `id`, `alt`
+ *
+ * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
+ * @param {string} type - Type of extraction: 'HTML'
+ *
+ * @example
+ * ```typescript Basic HTML Extraction
+ * const htmlStrategy: HTMLStrategy = {
+ *   type: "HTML",
+ *   model: "gpt-4"
+ * };
+ *
+ * const data = await extractStructuredData({
+ *   page: page,
+ *   strategy: htmlStrategy,
+ *   // ... other options
+ * });
+ * ```
+ *
+ * @example
+ * ```typescript Advanced HTML Extraction
+ * const htmlStrategy: HTMLStrategy = {
+ *   type: "HTML",
+ *   model: "claude-3-sonnet-20240620"
+ * };
+ *
+ * // Extract product details from a specific container
+ * const productData = await extractStructuredData({
+ *   locator: page.locator('.product-container'),
+ *   strategy: htmlStrategy,
+ *   dataSchema: productSchema,
+ *   entityName: "product",
+ *   label: "product-extractor"
+ * });
+ * ```
+ */
+export interface HTMLStrategy {
+  /** The AI model to use for content analysis and data extraction */
+  model: SUPPORTED_MODELS;
+  /** Strategy type identifier, must be "HTML" for HTML-based extraction */
+  type: "HTML";
+}
+/**
+ * @interface ImageStrategy
+ * Represents a strategy for extracting data from visual content using AI vision models.
+ *
+ * This strategy captures screenshots of the target page or element and uses AI vision
+ * capabilities to extract information. It's particularly useful for:
+ * - Data embedded in images or charts
+ * - Content with complex visual layouts
+ * - Information that's not directly accessible in the HTML
+ *
+ * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
+ * @param {string} type - Type of extraction: 'IMAGE'
+ * @example
+ * ```typescript Basic Image Analysis
+ * const imageStrategy: ImageStrategy = {
+ *   type: "IMAGE",
+ *   model: "gpt-4-vision"
+ * };
+ *
+ * const chartData = await extractStructuredData({
+ *   locator: page.locator('.chart-container'),
+ *   strategy: imageStrategy,
+ *   dataSchema: chartSchema,
+ *   entityName: "chart",
+ *   label: "chart-data-extractor"
+ * });
+ * ```
+ *
+ * @example
+ * ```typescript Complex Visual Extraction
+ * const imageStrategy: ImageStrategy = {
+ *   type: "IMAGE",
+ *   model: "claude-3-sonnet-20240620"
+ * };
+ *
+ * // Extract data from a complex dashboard
+ * const dashboardData = await extractStructuredData({
+ *   page: page,
+ *   strategy: imageStrategy,
+ *   dataSchema: dashboardSchema,
+ *   entityName: "dashboard",
+ *   label: "dashboard-metrics",
+ *   prompt: "Extract all metrics and their values from this dashboard view"
+ * });
+ * ```
+ */
+export interface ImageStrategy {
+  /** The AI vision model to use for image analysis and data extraction */
+  model: SUPPORTED_MODELS;
+  /** Strategy type identifier, must be "IMAGE" for image-based extraction */
+  type: "IMAGE";
+}
+/**
+ * @interface MarkDownStrategy
+ * Represents a strategy for extracting data from content after converting it to Markdown format.
+ *
+ * This strategy first converts the HTML content to semantic Markdown before processing,
+ * which helps in:
+ * - Preserving content hierarchy and structure
+ * - Removing unnecessary styling and formatting
+ * - Focusing on semantic meaning of the content
+ * - Handling content-heavy pages more efficiently
+ *
+ * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
+ * @param {string} type - Type of extraction: 'MARKDOWN'
+ * @example
+ * ```typescript Basic Article Extraction
+ * const markdownStrategy: MarkDownStrategy = {
+ *   type: "MARKDOWN",
+ *   model: "gpt-4"
+ * };
+ *
+ * const articleData = await extractStructuredData({
+ *   locator: page.locator('article'),
+ *   strategy: markdownStrategy,
+ *   dataSchema: articleSchema,
+ *   entityName: "article",
+ *   label: "article-content"
+ * });
+ * ```
+ *
+ * @example
+ * ```typescript Documentation Extraction
+ * const markdownStrategy: MarkDownStrategy = {
+ *   type: "MARKDOWN",
+ *   model: "claude-3-sonnet-20240620"
+ * };
+ *
+ * // Extract structured data from documentation pages
+ * const docData = await extractStructuredData({
+ *   page: page,
+ *   strategy: markdownStrategy,
+ *   dataSchema: documentationSchema,
+ *   entityName: "documentation",
+ *   label: "docs-extractor",
+ *   prompt: "Extract main concepts, code examples, and API references"
+ * });
+ * ```
+ */
+export interface MarkDownStrategy {
+  /** The AI model to use for processing the Markdown content */
+  model: SUPPORTED_MODELS;
+  /** Strategy type identifier, must be "MARKDOWN" for Markdown-based extraction */
+  type: "MARKDOWN";
+}
+/**
+ * @interface HtmlStrategy
+ * Represents a strategy for extracting data from HTML content using AI models.
+ * @param {SUPPORTED_MODELS} model - AI model to use for extraction (e.g., "gpt-4", "claude-3"), see [SUPPORTED_MODELS](../type-aliases/SUPPORTED_MODELS) for all supported models.
+ * @param {string} type - Type of extraction: 'HTML'
+ * @example
+ * ```typescript Basic HTML Extraction
+ * const htmlStrategy: HtmlStrategy = {
+ *   type: "HTML",
+ *   model: "gpt-4"
+ * };
+ * ```
+ * @example
+ * ```typescript Advanced HTML Extraction
+ * const htmlStrategy: HtmlStrategy = {
+ *   type: "HTML",
+ *   model: "claude-3-sonnet-20240620"
+ * };
+ * ```
+ */
+export interface HtmlStrategy {
+  type: "HTML";
+  model: SUPPORTED_MODELS;
+}

package/dist/browser/ai/index.js ADDED Viewed

@@ -0,0 +1,19 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", {
+  value: true
+});
+Object.defineProperty(exports, "extractStructuredData", {
+  enumerable: true,
+  get: function () {
+    return _extractStructuredData.extractStructuredData;
+  }
+});
+Object.defineProperty(exports, "isPageLoaded", {
+  enumerable: true,
+  get: function () {
+    return _isPageLoaded.isPageLoaded;
+  }
+});
+var _extractStructuredData = require("./extractStructuredData");
+var _isPageLoaded = require("./isPageLoaded");

package/dist/browser/ai/isPageLoaded.js ADDED Viewed

@@ -0,0 +1,67 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", {
+  value: true
+});
+exports.isPageLoaded = void 0;
+var _utils = require("../../helpers/utils");
+var _ai = require("ai");
+const isPageLoaded = async (page, options) => {
+  const timeoutInMs = options !== null && options !== void 0 && options.timeoutInMs ? options === null || options === void 0 ? void 0 : options.timeoutInMs : 10000;
+  const screenshotBytes = await page.screenshot({
+    fullPage: false,
+    type: "png",
+    timeout: timeoutInMs
+  });
+  const gateway = _utils.GatewayFactory.createAIGateway();
+  const gatewayModel = gateway.getModel((options === null || options === void 0 ? void 0 : options.model) ?? "gpt-4o-2024-08-06", options === null || options === void 0 ? void 0 : options.apiKey);
+  const base64Image = Buffer.from(screenshotBytes).toString("base64");
+  const response = await (0, _ai.generateText)({
+    model: gatewayModel,
+    messages: [{
+      role: "system",
+      content: `You are a helpful assistant that determines if a webpage finished loading. If the page finished loading, start your answer with 'True'. If the page is loading, start your answer with 'False'. If you are not sure, start your answer with 'Dont know'. In a new line, add a reason to your response.
+Some good cues for determining if a page is loading:
+- Loading spinner
+- Page is blank
+- Some content looks like it's missing
+- Not on splash screen`
+    }, {
+      role: "user",
+      content: [{
+        type: "text",
+        text: "Look at the screenshot and tell me, is the page loading or has it finished loading?"
+      }, {
+        type: "image",
+        image: base64Image
+      }]
+    }]
+  });
+  let llmResult = response.text.trim();
+  if (!llmResult) {
+    throw new Error("LLM response is empty");
+  }
+  llmResult = llmResult.split("\n").filter(line => line.trim() !== "").join("\n");
+  const isTrue = llmResult.includes("True");
+  const isFalse = llmResult.includes("False");
+  const isDontKnow = llmResult.includes("Dont know") || llmResult.includes("Don't know");
+  const lines = llmResult.split("\n");
+  const reason = lines.length > 1 ? lines[1] : null;
+  let result;
+  if (isTrue) {
+    result = true;
+  } else if (isFalse) {
+    result = false;
+  } else if (isDontKnow) {
+    result = "Dont know";
+  } else {
+    throw new Error("LLM result is not valid");
+  }
+  return {
+    status: result,
+    reason: reason || llmResult,
+    cost: response.usage.totalTokens || 0
+  };
+};
+exports.isPageLoaded = isPageLoaded;