npm - @elizaos/computeruse - Versions diffs - 0.24.21 → 2.0.0-alpha.11 - Mend

@elizaos/computeruse 0.24.21 → 2.0.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/index.d.ts ADDED Viewed

@@ -0,0 +1,1653 @@
+/* tslint:disable */
+/* eslint-disable */
+/* auto-generated by NAPI-RS */
+/** Click position within element bounds as percentages (0-100) */
+export interface ClickPosition {
+  /** X position as percentage from left edge (0-100). 50 = center. */
+  xPercentage: number
+  /** Y position as percentage from top edge (0-100). 50 = center. */
+  yPercentage: number
+}
+/** Options for action methods (click, pressKey, scroll, etc.) */
+export interface ActionOptions {
+  /** Whether to highlight the element before performing the action. Defaults to false. */
+  highlightBeforeAction?: boolean
+  /** Whether to capture window screenshot after action. Defaults to true. */
+  includeWindowScreenshot?: boolean
+  /** Whether to capture monitor screenshots after action. Defaults to false. */
+  includeMonitorScreenshots?: boolean
+  /** Whether to try focusing the element before the action. Defaults to true. */
+  tryFocusBefore?: boolean
+  /** Whether to try clicking the element if focus fails. Defaults to true. */
+  tryClickBefore?: boolean
+  /** Whether to capture UI tree before/after action and compute diff. Defaults to false. */
+  uiDiffBeforeAfter?: boolean
+  /** Max depth for tree capture when doing UI diff. */
+  uiDiffMaxDepth?: number
+  /** Click position within element bounds. If not specified, clicks at center. */
+  clickPosition?: ClickPosition
+  /** Type of click: 'Left', 'Double', or 'Right'. Defaults to 'Left'. */
+  clickType?: ClickType
+  /** Whether to restore cursor to original position after click. Defaults to false. */
+  restoreCursor?: boolean
+  /**
+   * Whether to restore the original focus and caret position after the action. Defaults to false.
+   * When true, saves the currently focused element and caret position before the action, then restores them after.
+   */
+  restoreFocus?: boolean
+}
+/** Options for typeText method */
+export interface TypeTextOptions {
+  /**
+   * REQUIRED: Whether to clear existing text before typing.
+   * Set to true to clear the field first, false to append.
+   */
+  clearBeforeTyping: boolean
+  /** Whether to use clipboard for pasting. Defaults to false. */
+  useClipboard?: boolean
+  /** Whether to highlight the element before typing. Defaults to false. */
+  highlightBeforeAction?: boolean
+  /** Whether to capture window screenshot after action. Defaults to true. */
+  includeWindowScreenshot?: boolean
+  /** Whether to capture monitor screenshots after action. Defaults to false. */
+  includeMonitorScreenshots?: boolean
+  /** Whether to try focusing the element before typing. Defaults to true. */
+  tryFocusBefore?: boolean
+  /** Whether to try clicking the element if focus fails. Defaults to true. */
+  tryClickBefore?: boolean
+  /**
+   * Whether to restore the original focus and caret position after typing. Defaults to false.
+   * When true, saves the currently focused element and caret position before typing, then restores them after.
+   */
+  restoreFocus?: boolean
+  /** Whether to capture UI tree before/after action and compute diff. Defaults to false. */
+  uiDiffBeforeAfter?: boolean
+  /** Max depth for tree capture when doing UI diff. */
+  uiDiffMaxDepth?: number
+}
+/** Result of element validation */
+export interface ValidationResult {
+  /** Whether the element exists */
+  exists: boolean
+  /** The element if found */
+  element?: Element
+  /** Error message if validation failed (not element not found, but actual error) */
+  error?: string
+}
+export interface Bounds {
+  x: number
+  y: number
+  width: number
+  height: number
+}
+export interface Coordinates {
+  x: number
+  y: number
+}
+/** Result of UI diff capture */
+export interface UiDiffResult {
+  /** The computed diff showing changes (lines starting with + or -) */
+  diff: string
+  /** Whether any UI changes were detected */
+  hasChanges: boolean
+}
+export interface ClickResult {
+  method: string
+  coordinates?: Coordinates
+  details: string
+  /** Path to window screenshot if captured */
+  windowScreenshotPath?: string
+  /** Paths to monitor screenshots if captured */
+  monitorScreenshotPaths?: Array<string>
+  /** UI diff result if ui_diff_before_after was enabled */
+  uiDiff?: UiDiffResult
+}
+/** Result of an action operation (type_text, press_key, scroll, etc.) */
+export interface ActionResult {
+  /** Whether the action succeeded */
+  success: boolean
+  /** Path to window screenshot if captured */
+  windowScreenshotPath?: string
+  /** Paths to monitor screenshots if captured */
+  monitorScreenshotPaths?: Array<string>
+  /** UI diff result if ui_diff_before_after was enabled */
+  uiDiff?: UiDiffResult
+}
+/** Type of mouse click to perform */
+export const enum ClickType {
+  /** Single left click (default) */
+  Left = 'Left',
+  /** Double left click */
+  Double = 'Double',
+  /** Single right click */
+  Right = 'Right'
+}
+/** Source of indexed elements for click targeting */
+export const enum VisionType {
+  /** UI Automation tree elements (default) */
+  UiTree = 'UiTree',
+  /** OCR-detected text elements */
+  Ocr = 'Ocr',
+  /** Omniparser-detected elements */
+  Omniparser = 'Omniparser',
+  /** Gemini Vision-detected elements */
+  Gemini = 'Gemini',
+  /** Browser DOM elements */
+  Dom = 'Dom'
+}
+export interface CommandOutput {
+  exitStatus?: number
+  stdout: string
+  stderr: string
+}
+export interface Monitor {
+  id: string
+  name: string
+  isPrimary: boolean
+  width: number
+  height: number
+  x: number
+  y: number
+  scaleFactor: number
+}
+/** A screenshot result containing image data and dimensions. */
+export interface ScreenshotResult {
+  width: number
+  height: number
+  imageData: Array<number>
+  monitor?: Monitor
+}
+export interface ResizedDimensions {
+  width: number
+  height: number
+}
+export interface MonitorScreenshotPair {
+  monitor: Monitor
+  screenshot: ScreenshotResult
+}
+export interface UIElementAttributes {
+  role: string
+  name?: string
+  label?: string
+  value?: string
+  description?: string
+  properties: Record<string, string | undefined | null>
+  isKeyboardFocusable?: boolean
+  bounds?: Bounds
+}
+export interface UINode {
+  id?: string
+  attributes: UIElementAttributes
+  children: Array<UINode>
+}
+/** Entry in index-to-bounds mapping for click targeting */
+export interface BoundsEntry {
+  role: string
+  name: string
+  bounds: Bounds
+  selector?: string
+}
+/** Result of get_window_tree_result operation with all computed data */
+export interface WindowTreeResult {
+  /** The raw UI tree structure */
+  tree: UINode
+  /** Process ID of the window */
+  pid: number
+  /** Whether this is a browser window */
+  isBrowser: boolean
+  /** Formatted compact YAML output (if format_output was true) */
+  formatted?: string
+  /** Mapping of index to bounds for click targeting (keys are 1-based indices as strings) */
+  indexToBounds: Record<string, BoundsEntry>
+  /** Total count of indexed elements (elements with bounds) */
+  elementCount: number
+  /** Path to saved window screenshot (if include_window_screenshot was true) */
+  windowScreenshotPath?: string
+  /** Paths to saved monitor screenshots (if include_monitor_screenshots was true) */
+  monitorScreenshotPaths?: Array<string>
+}
+export const enum PropertyLoadingMode {
+  /** Only load essential properties (role + name) - fastest */
+  Fast = 'Fast',
+  /** Load all properties for complete element data - slower but comprehensive */
+  Complete = 'Complete',
+  /** Load specific properties based on element type - balanced approach */
+  Smart = 'Smart'
+}
+/** Output format for UI tree */
+export const enum TreeOutputFormat {
+  /** Compact YAML format with indexed elements: #1 [ROLE] name */
+  CompactYaml = 'CompactYaml',
+  /** Full JSON format with all fields and properties */
+  VerboseJson = 'VerboseJson',
+  /**
+   * Clustered YAML format: groups elements from all sources (UIA, DOM, OCR, Omniparser, Gemini)
+   * by spatial proximity with prefixed indices (#u1, #d2, #o3, #p4, #g5)
+   */
+  ClusteredYaml = 'ClusteredYaml'
+}
+/** Source of an element for clustered output */
+export const enum ElementSource {
+  /** #u - Accessibility tree (UIA) */
+  Uia = 'Uia',
+  /** #d - Browser DOM */
+  Dom = 'Dom',
+  /** #o - OCR text */
+  Ocr = 'Ocr',
+  /** #p - Omniparser vision */
+  Omniparser = 'Omniparser',
+  /** #g - Gemini vision */
+  Gemini = 'Gemini'
+}
+/** Display mode for inspect overlay labels */
+export const enum OverlayDisplayMode {
+  /** Just rectangles, no labels */
+  Rectangles = 'Rectangles',
+  /** [index] only (default) */
+  Index = 'Index',
+  /** [role] only */
+  Role = 'Role',
+  /** [index:role] */
+  IndexRole = 'IndexRole',
+  /** [name] only */
+  Name = 'Name',
+  /** [index:name] */
+  IndexName = 'IndexName',
+  /** [index:role:name] */
+  Full = 'Full'
+}
+/** Element data for inspect overlay rendering */
+export interface InspectElement {
+  /** 1-based index for click targeting */
+  index: number
+  /** Element role (e.g., "Button", "Edit") */
+  role: string
+  /** Element name if available */
+  name?: string
+  /** Bounding box (x, y, width, height) */
+  bounds: Bounds
+}
+/**
+ * OCR element representing text detected via optical character recognition.
+ * Hierarchy: OcrResult -> OcrLine -> OcrWord
+ */
+export interface OcrElement {
+  /** Role type: "OcrResult", "OcrLine", or "OcrWord" */
+  role: string
+  /** The recognized text content */
+  text?: string
+  /** Bounding box in absolute screen coordinates */
+  bounds?: Bounds
+  /** Text rotation angle in degrees (only present on OcrResult) */
+  textAngle?: number
+  /** Confidence score (0.0 to 1.0) if available */
+  confidence?: number
+  /** Child elements (lines for OcrResult, words for OcrLine) */
+  children?: Array<OcrElement>
+}
+/** Result of OCR operation with tree and index-to-bounds mapping */
+export interface OcrResult {
+  /** The OCR tree structure */
+  tree: OcrElement
+  /** Formatted compact YAML output (if format_output was true) */
+  formatted?: string
+  /**
+   * Mapping of index to bounds for click targeting (keys are 1-based indices as strings)
+   * Value contains (text, bounds)
+   */
+  indexToBounds: Record<string, OcrBoundsEntry>
+  /** Total count of indexed elements (words with bounds) */
+  elementCount: number
+}
+/** Entry in OCR index-to-bounds mapping for click targeting */
+export interface OcrBoundsEntry {
+  text: string
+  bounds: Bounds
+}
+/** Browser DOM element captured from a web page */
+export interface BrowserDomElement {
+  /** HTML tag name (lowercase) */
+  tag: string
+  /** Element id attribute */
+  id?: string
+  /** CSS classes */
+  classes: Array<string>
+  /** Visible text content (truncated to 100 chars) */
+  text?: string
+  /** href attribute for links */
+  href?: string
+  /** type attribute for inputs */
+  type?: string
+  /** name attribute */
+  name?: string
+  /** value attribute for inputs */
+  value?: string
+  /** placeholder attribute */
+  placeholder?: string
+  /** aria-label attribute */
+  ariaLabel?: string
+  /** role attribute */
+  role?: string
+  /** Bounding box in screen coordinates */
+  bounds: Bounds
+}
+/** Entry in DOM index-to-bounds mapping for click targeting */
+export interface DomBoundsEntry {
+  /** Display name (text or aria-label or tag) */
+  name: string
+  /** HTML tag */
+  tag: string
+  /** Bounding box */
+  bounds: Bounds
+}
+/** Result of browser DOM capture operation */
+export interface BrowserDomResult {
+  /** List of captured DOM elements */
+  elements: Array<BrowserDomElement>
+  /** Formatted compact YAML output (if format_output was true) */
+  formatted?: string
+  /** Mapping of index to bounds for click targeting */
+  indexToBounds: Record<string, DomBoundsEntry>
+  /** Total count of captured elements */
+  elementCount: number
+  /** Page URL */
+  pageUrl: string
+  /** Page title */
+  pageTitle: string
+}
+/** UI element detected by Gemini vision model */
+export interface VisionElement {
+  /** Element type: text, icon, button, input, checkbox, dropdown, link, image, unknown */
+  elementType: string
+  /** Visible text or label on the element */
+  content?: string
+  /** AI description of what this element is or does */
+  description?: string
+  /** Bounding box in screen coordinates (x, y, width, height) */
+  bounds?: Bounds
+  /** Whether the element is interactive/clickable */
+  interactivity?: boolean
+}
+/** Entry in Gemini vision index-to-bounds mapping for click targeting */
+export interface VisionBoundsEntry {
+  /** Display name (content or description) */
+  name: string
+  /** Element type */
+  elementType: string
+  /** Bounding box */
+  bounds: Bounds
+}
+/** Result of Gemini vision detection operation */
+export interface GeminiVisionResult {
+  /** List of detected UI elements */
+  elements: Array<VisionElement>
+  /** Formatted compact YAML output (if format_output was true) */
+  formatted?: string
+  /** Mapping of index to bounds for click targeting */
+  indexToBounds: Record<string, VisionBoundsEntry>
+  /** Total count of detected elements */
+  elementCount: number
+}
+/** Item detected by Omniparser V2 (icon/field detection) */
+export interface OmniparserItem {
+  /** Element label: "icon", "text", etc. */
+  label: string
+  /** Content or OCR text */
+  content?: string
+  /** Bounding box in screen coordinates (x, y, width, height) */
+  bounds?: Bounds
+}
+/** Entry in Omniparser index-to-bounds mapping for click targeting */
+export interface OmniparserBoundsEntry {
+  /** Display name (content or label) */
+  name: string
+  /** Element label */
+  label: string
+  /** Bounding box */
+  bounds: Bounds
+}
+/** Result of Omniparser detection operation */
+export interface OmniparserResult {
+  /** List of detected items */
+  items: Array<OmniparserItem>
+  /** Formatted compact YAML output (if format_output was true) */
+  formatted?: string
+  /** Mapping of index to bounds for click targeting */
+  indexToBounds: Record<string, OmniparserBoundsEntry>
+  /** Total count of detected items */
+  itemCount: number
+}
+/** Entry in clustered index mapping (for click targeting across all sources) */
+export interface ClusteredBoundsEntry {
+  /** Element source (Uia, Dom, Ocr, Omniparser, Gemini) */
+  source: ElementSource
+  /** Original index within the source */
+  originalIndex: number
+  /** Bounding box in screen coordinates */
+  bounds: Bounds
+}
+/** Result of clustered tree formatting */
+export interface ClusteredFormattingResult {
+  /** Formatted clustered YAML output */
+  formatted: string
+  /** Mapping from prefixed index (e.g., "u1", "d2") to source and bounds */
+  indexToSourceAndBounds: Record<string, ClusteredBoundsEntry>
+}
+export interface TreeBuildConfig {
+  /** Property loading strategy */
+  propertyMode: PropertyLoadingMode
+  /** Optional timeout per operation in milliseconds */
+  timeoutPerOperationMs?: number
+  /** Optional yield frequency for responsiveness */
+  yieldEveryNElements?: number
+  /** Optional batch size for processing elements */
+  batchSize?: number
+  /** Optional maximum depth to traverse (undefined = unlimited) */
+  maxDepth?: number
+  /** Delay in milliseconds to wait for UI to stabilize before capturing tree */
+  uiSettleDelayMs?: number
+  /** Generate formatted output alongside the tree structure (defaults to true if tree_output_format is set) */
+  formatOutput?: boolean
+  /** Output format for tree: 'CompactYaml' (default) or 'VerboseJson' */
+  treeOutputFormat?: TreeOutputFormat
+  /** Selector to start tree from instead of window root (e.g., "role:Dialog" to focus on a dialog) */
+  treeFromSelector?: string
+  /** Include window screenshot in result (saved to executions dir). Defaults to false. */
+  includeWindowScreenshot?: boolean
+  /** Include all monitor screenshots in result (saved to executions dir). Defaults to false. */
+  includeMonitorScreenshots?: boolean
+  /** Include Gemini Vision AI detection. Elements prefixed with #g1, #g2, etc. */
+  includeGeminiVision?: boolean
+  /** Include Omniparser detection. Elements prefixed with #p1, #p2, etc. */
+  includeOmniparser?: boolean
+  /** Include OCR text detection. Elements prefixed with #o1, #o2, etc. */
+  includeOcr?: boolean
+  /** Include browser DOM elements (requires ComputerUse Bridge extension). Elements prefixed with #d1, #d2, etc. */
+  includeBrowserDom?: boolean
+}
+export const enum TextPosition {
+  Top = 'Top',
+  TopRight = 'TopRight',
+  Right = 'Right',
+  BottomRight = 'BottomRight',
+  Bottom = 'Bottom',
+  BottomLeft = 'BottomLeft',
+  Left = 'Left',
+  TopLeft = 'TopLeft',
+  Inside = 'Inside'
+}
+export interface FontStyle {
+  size: number
+  bold: boolean
+  color: number
+}
+/** A single step in the computer use execution */
+export interface ComputerUseStep {
+  /** Step number (1-indexed) */
+  step: number
+  /** Action that was executed */
+  action: string
+  /** Arguments passed to the action (as JSON string) */
+  args: string
+  /** Whether the action succeeded */
+  success: boolean
+  /** Error message if action failed */
+  error?: string
+  /** Model's reasoning text for this step */
+  text?: string
+}
+/** Pending confirmation info when safety check triggers */
+export interface ComputerUsePendingConfirmation {
+  /** Action that needs confirmation */
+  action: string
+  /** Arguments for the action (as JSON string) */
+  args: string
+  /** Model's explanation text */
+  text?: string
+}
+/** Result of the computer use execution */
+export interface ComputerUseResult {
+  /** Status: "success", "failed", "needs_confirmation", "max_steps_reached" */
+  status: string
+  /** The goal that was attempted */
+  goal: string
+  /** Number of steps executed */
+  stepsExecuted: number
+  /** Last action performed */
+  finalAction: string
+  /** Final text response from model */
+  finalText?: string
+  /** History of all steps */
+  steps: Array<ComputerUseStep>
+  /** Pending confirmation info if status is "needs_confirmation" */
+  pendingConfirmation?: ComputerUsePendingConfirmation
+  /** Execution ID for finding screenshots (e.g., "20251205_134500_geminiComputerUse_msedge") */
+  executionId?: string
+}
+/** Result of closing a browser tab */
+export interface CloseTabResult {
+  closed: boolean
+  tab: ClosedTabInfo
+}
+/** Information about a closed tab */
+export interface ClosedTabInfo {
+  id: number
+  url?: string
+  title?: string
+  windowId?: number
+}
+/** Options for closing a browser tab */
+export interface CloseTabOptions {
+  /** Specific Chrome tab ID to close */
+  tabId?: number
+  /** URL to match (partial match supported) */
+  url?: string
+  /** Title to match (case-insensitive partial match) */
+  title?: string
+}
+/** Information about a window */
+export interface WindowInfo {
+  /** Window handle */
+  hwnd: number
+  /** Process name (e.g., "notepad.exe") */
+  processName: string
+  /** Process ID */
+  processId: number
+  /** Z-order position (0 = topmost) */
+  zOrder: number
+  /** Whether the window is minimized */
+  isMinimized: boolean
+  /** Whether the window is maximized */
+  isMaximized: boolean
+  /** Whether the window has WS_EX_TOPMOST style */
+  isAlwaysOnTop: boolean
+  /** Window title */
+  title: string
+}
+/** Main entry point for desktop automation. */
+export declare class Desktop {
+  /**
+   * Create a new Desktop automation instance with configurable options.
+   *
+   * @param {boolean} [useBackgroundApps=false] - Enable background apps support.
+   * @param {boolean} [activateApp=false] - Enable app activation support.
+   * @param {string} [logLevel] - Logging level (e.g., 'info', 'debug', 'warn', 'error').
+   *                              Falls back to RUST_LOG or COMPUTERUSE_LOG_LEVEL env vars, defaults to 'info'.
+   * @returns {Desktop} A new Desktop automation instance.
+   */
+  constructor(useBackgroundApps?: boolean | undefined | null, activateApp?: boolean | undefined | null, logLevel?: string | undefined | null)
+  /**
+   * Get the root UI element of the desktop.
+   *
+   * @returns {Element} The root UI element.
+   */
+  root(): Element
+  /**
+   * Get a list of all running applications.
+   *
+   * @returns {Array<Element>} List of application UI elements.
+   */
+  applications(): Array<Element>
+  /**
+   * Get a running application by name.
+   *
+   * @param {string} name - The name of the application to find.
+   * @returns {Element} The application UI element.
+   */
+  application(name: string): Element
+  /**
+   * Open an application by name.
+   *
+   * @param {string} name - The name of the application to open.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot after opening
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after opening
+   */
+  openApplication(name: string, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): Element
+  /**
+   * Activate an application by name.
+   *
+   * @param {string} name - The name of the application to activate.
+   */
+  activateApplication(name: string): void
+  /**
+   * Click within element bounds at a specified position.
+   *
+   * This is useful for clicking on elements from UI tree, OCR, omniparser, gemini vision, or DOM
+   * without needing an element reference - just the bounds.
+   *
+   * @param {number} x - X coordinate of the bounds.
+   * @param {number} y - Y coordinate of the bounds.
+   * @param {number} width - Width of the bounds.
+   * @param {number} height - Height of the bounds.
+   * @param {number} [xPercentage=50] - X position within bounds as percentage (0-100). Defaults to 50 (center).
+   * @param {number} [yPercentage=50] - Y position within bounds as percentage (0-100). Defaults to 50 (center).
+   * @param {ClickType} [clickType='left'] - Type of click: 'left', 'double', or 'right'.
+   * @param {boolean} [restoreCursor=true] - If true, restore cursor to original position after clicking.
+   * @param {string} [process] - Process name for window screenshot capture. If provided, enables window screenshots.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot (requires process).
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after clicking.
+   * @returns {ClickResult} Result with clicked coordinates and method details.
+   */
+  clickAtBounds(x: number, y: number, width: number, height: number, xPercentage?: number | undefined | null, yPercentage?: number | undefined | null, clickType?: ClickType | undefined | null, restoreCursor?: boolean | undefined | null, process?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): ClickResult
+  /**
+   * Click on an element by its index from the last tree/vision query.
+   *
+   * This looks up cached bounds from the appropriate cache based on visionType,
+   * then clicks at the specified position within those bounds.
+   *
+   * @param {number} index - 1-based index from the tree/vision output (e.g., #1, #2).
+   * @param {VisionType} [visionType='UiTree'] - Source of the index: 'UiTree', 'Ocr', 'Omniparser', 'Gemini', or 'Dom'.
+   * @param {number} [xPercentage=50] - X position within bounds as percentage (0-100).
+   * @param {number} [yPercentage=50] - Y position within bounds as percentage (0-100).
+   * @param {ClickType} [clickType='Left'] - Type of click: 'Left', 'Double', or 'Right'.
+   * @param {boolean} [restoreCursor=true] - If true, restore cursor to original position after clicking.
+   * @param {string} [process] - Process name for window screenshot capture. If provided, enables window screenshots.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot (requires process).
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after clicking.
+   * @returns {ClickResult} Result with clicked coordinates, element info, and method details.
+   */
+  clickByIndex(index: number, visionType?: VisionType | undefined | null, xPercentage?: number | undefined | null, yPercentage?: number | undefined | null, clickType?: ClickType | undefined | null, restoreCursor?: boolean | undefined | null, process?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): ClickResult
+  /**
+   * (async) Run a shell command.
+   *
+   * @param {string} [windowsCommand] - Command to run on Windows.
+   * @param {string} [unixCommand] - Command to run on Unix.
+   * @returns {Promise<CommandOutput>} The command output.
+   */
+  runCommand(windowsCommand?: string | undefined | null, unixCommand?: string | undefined | null): Promise<CommandOutput>
+  /**
+   * (async) Execute a shell command using GitHub Actions-style syntax.
+   *
+   * @param {string} command - The command to run (can be single or multi-line).
+   * @param {string} [shell] - Optional shell to use (defaults to PowerShell on Windows, bash on Unix).
+   * @param {string} [workingDirectory] - Optional working directory for the command.
+   * @returns {Promise<CommandOutput>} The command output.
+   */
+  run(command: string, shell?: string | undefined | null, workingDirectory?: string | undefined | null): Promise<CommandOutput>
+  /**
+   * (async) Perform OCR on an image file.
+   *
+   * @param {string} imagePath - Path to the image file.
+   * @returns {Promise<string>} The extracted text.
+   */
+  ocrImagePath(imagePath: string): Promise<string>
+  /**
+   * (async) Perform OCR on a screenshot.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to process.
+   * @returns {Promise<string>} The extracted text.
+   */
+  ocrScreenshot(screenshot: ScreenshotResult): Promise<string>
+  /**
+   * (async) Perform OCR on a window by process name and return structured results with bounding boxes.
+   * Returns an OcrResult containing the OCR tree, formatted output, and index-to-bounds mapping
+   * for click targeting.
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {boolean} [formatOutput=true] - Whether to generate formatted compact YAML output.
+   * @returns {Promise<OcrResult>} Complete OCR result with tree, formatted output, and bounds mapping.
+   */
+  performOcrForProcess(process: string, formatOutput?: boolean | undefined | null): Promise<OcrResult>
+  /** (async) Perform OCR on a window by process name (non-Windows stub). */
+  performOcrForProcess(process: string, formatOutput?: boolean | undefined | null): Promise<OcrResult>
+  /**
+   * (async) Capture DOM elements from the current browser tab.
+   *
+   * Extracts visible DOM elements with their properties and screen coordinates.
+   * Uses JavaScript injection via Chrome extension to traverse the DOM tree.
+   *
+   * @param {number} [maxElements=200] - Maximum number of elements to capture.
+   * @param {boolean} [formatOutput=true] - Whether to include formatted compact YAML output.
+   * @returns {Promise<BrowserDomResult>} DOM elements with bounds for click targeting.
+   */
+  captureBrowserDom(maxElements?: number | undefined | null, formatOutput?: boolean | undefined | null): Promise<BrowserDomResult>
+  /**
+   * (async) Get a clustered tree combining elements from multiple sources grouped by spatial proximity.
+   *
+   * Combines accessibility tree (UIA) elements with optional DOM, Omniparser, and Gemini Vision elements,
+   * clustering nearby elements together. Each element is prefixed with its source:
+   * - #u1, #u2... for UIA (accessibility tree)
+   * - #d1, #d2... for DOM (browser content)
+   * - #p1, #p2... for Omniparser (vision AI detection)
+   * - #g1, #g2... for Gemini Vision (AI element detection)
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {number} [maxDomElements=100] - Maximum DOM elements to capture for browsers.
+   * @param {boolean} [includeOmniparser=false] - Whether to include Omniparser vision detection.
+   * @param {boolean} [includeGeminiVision=false] - Whether to include Gemini Vision AI detection.
+   * @returns {Promise<ClusteredFormattingResult>} Clustered tree with prefixed indices.
+   */
+  getClusteredTree(process: string, maxDomElements?: number | undefined | null, includeOmniparser?: boolean | undefined | null, includeGeminiVision?: boolean | undefined | null): Promise<ClusteredFormattingResult>
+  /**
+   * (async) Perform Gemini vision AI detection on a window by process name.
+   *
+   * Captures a screenshot and sends it to the Gemini vision backend for UI element detection.
+   * Requires GEMINI_VISION_BACKEND_URL environment variable (defaults to https://app.mediar.ai/api/vision/parse).
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {boolean} [formatOutput=true] - Whether to include formatted compact YAML output.
+   * @returns {Promise<GeminiVisionResult>} Detected UI elements with bounds for click targeting.
+   */
+  performGeminiVisionForProcess(process: string, formatOutput?: boolean | undefined | null): Promise<GeminiVisionResult>
+  /**
+   * (async) Perform Omniparser V2 detection on a window by process name.
+   *
+   * Captures a screenshot and sends it to the Omniparser backend for icon/field detection.
+   * Requires OMNIPARSER_BACKEND_URL environment variable (defaults to https://app.mediar.ai/api/omniparser/parse).
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {number} [imgsz=1920] - Icon detection image size (640-1920). Higher = better but slower.
+   * @param {boolean} [formatOutput=true] - Whether to include formatted compact YAML output.
+   * @returns {Promise<OmniparserResult>} Detected items with bounds for click targeting.
+   */
+  performOmniparserForProcess(process: string, imgsz?: number | undefined | null, formatOutput?: boolean | undefined | null): Promise<OmniparserResult>
+  /**
+   * (async) Get the currently focused browser window.
+   *
+   * @returns {Promise<Element>} The current browser window element.
+   */
+  getCurrentBrowserWindow(): Promise<Element>
+  /**
+   * Create a locator for finding UI elements.
+   *
+   * @param {string | Selector} selector - The selector.
+   * @returns {Locator} A locator for finding elements.
+   */
+  locator(selector: string | Selector): Locator
+  /**
+   * Create a process-scoped locator for finding UI elements.
+   * This is the recommended way to create locators - always scope to a specific process.
+   *
+   * @param {string} process - Process name to scope the search (e.g., 'chrome', 'notepad').
+   * @param {string | Selector} selector - The selector to find within the process.
+   * @param {string} [windowSelector] - Optional window selector for additional filtering.
+   * @returns {Locator} A locator for finding elements within the process.
+   */
+  locatorForProcess(process: string, selector: string | Selector, windowSelector?: string | undefined | null): Locator
+  /**
+   * (async) Get the currently focused window.
+   *
+   * @returns {Promise<Element>} The current window element.
+   */
+  getCurrentWindow(): Promise<Element>
+  /**
+   * (async) Get the currently focused application.
+   *
+   * @returns {Promise<Element>} The current application element.
+   */
+  getCurrentApplication(): Promise<Element>
+  /**
+   * Get the currently focused element.
+   *
+   * @returns {Element} The focused element.
+   */
+  focusedElement(): Element
+  /**
+   * Open a URL in a browser.
+   *
+   * @param {string} url - The URL to open.
+   * @param {string} [browser] - The browser to use. Can be "Default", "Chrome", "Firefox", "Edge", "Brave", "Opera", "Vivaldi", or a custom browser path.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot after opening
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after opening
+   */
+  openUrl(url: string, browser?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): Element
+  /**
+   * Open a file with its default application.
+   *
+   * @param {string} filePath - Path to the file to open.
+   * @param {string} [process] - Process name for window screenshot capture. If provided, enables window screenshots.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot (requires process).
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after opening.
+   */
+  openFile(filePath: string, process?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): void
+  /**
+   * Activate a browser window by title.
+   *
+   * @param {string} title - The window title to match.
+   */
+  activateBrowserWindowByTitle(title: string): void
+  /**
+   * Get the UI tree for a window identified by process name and optional title.
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {string} [title] - Optional window title filter.
+   * @param {TreeBuildConfig} [config] - Optional configuration for tree building.
+   * @returns {UINode} Complete UI tree starting from the identified window.
+   */
+  getWindowTree(process: string, title?: string | undefined | null, config?: TreeBuildConfig | undefined | null): UINode
+  /**
+   * Get the UI tree with full result including formatting and bounds mapping.
+   *
+   * This is the recommended method for getting window trees when you need:
+   * - Formatted YAML output for LLM consumption
+   * - Index-to-bounds mapping for click targeting
+   * - Browser detection
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {string} [title] - Optional window title filter.
+   * @param {TreeBuildConfig} [config] - Configuration options:
+   *   - formatOutput: Enable formatted output (default: true if treeOutputFormat set)
+   *   - treeOutputFormat: 'CompactYaml' (default) or 'VerboseJson'
+   *   - treeFromSelector: Selector to start tree from (use getWindowTreeResultAsync for this)
+   *   - includeWindowScreenshot: Save window screenshot to executions dir (default: false)
+   *   - includeMonitorScreenshots: Save all monitor screenshots to executions dir (default: false)
+   * @returns {WindowTreeResult} Complete result with tree, formatted output, bounds mapping, and screenshot paths.
+   */
+  getWindowTreeResult(process: string, title?: string | undefined | null, config?: TreeBuildConfig | undefined | null): WindowTreeResult
+  /**
+   * (async) Get the UI tree with full result, supporting tree_from_selector.
+   *
+   * Use this method when you need to scope the tree to a specific subtree using a selector.
+   *
+   * @param {string} process - Process name to match (e.g., 'chrome', 'notepad').
+   * @param {string} [title] - Optional window title filter.
+   * @param {TreeBuildConfig} [config] - Configuration options:
+   *   - formatOutput: Enable formatted output (default: true)
+   *   - treeOutputFormat: 'CompactYaml' (default) or 'VerboseJson'
+   *   - treeFromSelector: Selector to start tree from (e.g., "role:Dialog")
+   * @returns {Promise<WindowTreeResult>} Complete result with tree, formatted output, and bounds mapping.
+   */
+  getWindowTreeResultAsync(process: string, title?: string | undefined | null, config?: TreeBuildConfig | undefined | null): Promise<WindowTreeResult>
+  /**
+   * (async) List all available monitors/displays.
+   *
+   * @returns {Promise<Array<Monitor>>} List of monitor information.
+   */
+  listMonitors(): Promise<Array<Monitor>>
+  /**
+   * (async) Get the primary monitor.
+   *
+   * @returns {Promise<Monitor>} Primary monitor information.
+   */
+  getPrimaryMonitor(): Promise<Monitor>
+  /**
+   * (async) Get the monitor containing the currently focused window.
+   *
+   * @returns {Promise<Monitor>} Active monitor information.
+   */
+  getActiveMonitor(): Promise<Monitor>
+  /**
+   * (async) Get a monitor by its ID.
+   *
+   * @param {string} id - The monitor ID to find.
+   * @returns {Promise<Monitor>} Monitor information.
+   */
+  getMonitorById(id: string): Promise<Monitor>
+  /**
+   * (async) Get a monitor by its name.
+   *
+   * @param {string} name - The monitor name to find.
+   * @returns {Promise<Monitor>} Monitor information.
+   */
+  getMonitorByName(name: string): Promise<Monitor>
+  /**
+   * (async) Capture a screenshot of a specific monitor.
+   *
+   * @param {Monitor} monitor - The monitor to capture.
+   * @returns {Promise<ScreenshotResult>} The screenshot data.
+   */
+  captureMonitor(monitor: Monitor): Promise<ScreenshotResult>
+  /**
+   * (async) Capture screenshots of all monitors.
+   *
+   * @returns {Promise<Array<{monitor: Monitor, screenshot: ScreenshotResult}>>} Array of monitor and screenshot pairs.
+   */
+  captureAllMonitors(): Promise<Array<MonitorScreenshotPair>>
+  /**
+   * Capture a screenshot of a window by process name.
+   *
+   * Finds the first window matching the given process name and captures its screenshot.
+   * Process name matching is case-insensitive and uses substring matching.
+   *
+   * @param {string} process - Process name to match (e.g., "chrome", "notepad", "code")
+   * @returns {ScreenshotResult} The screenshot data.
+   */
+  captureWindowByProcess(process: string): ScreenshotResult
+  /**
+   * (async) Captures a screenshot. Three modes:
+   * 1. Element mode: provide process + selector to capture specific element
+   * 2. Window mode: provide process only to capture entire window
+   * 3. Monitor mode: provide process + entireMonitor=true to capture the monitor where the window is located
+   *
+   * @param {string} process - Process name to match (e.g., "chrome", "notepad", "code")
+   * @param {string} [selector] - Optional selector to capture a specific element within the process
+   * @param {boolean} [entireMonitor=false] - If true, captures the entire monitor containing the window
+   * @param {number} [timeoutMs=10000] - Timeout in milliseconds for finding the element
+   * @returns {Promise<ScreenshotResult>} The screenshot data.
+   */
+  captureScreenshot(process: string, selector?: string | undefined | null, entireMonitor?: boolean | undefined | null, timeoutMs?: number | undefined | null): Promise<ScreenshotResult>
+  /**
+   * Convert a screenshot to PNG bytes.
+   * Converts BGRA to RGBA and encodes as PNG format.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to convert.
+   * @returns {Buffer} PNG-encoded bytes.
+   */
+  screenshotToPng(screenshot: ScreenshotResult): Array<number>
+  /**
+   * Convert a screenshot to PNG bytes with resizing.
+   * If the image exceeds maxDimension in either width or height,
+   * it will be resized while maintaining aspect ratio.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to convert.
+   * @param {number} [maxDimension] - Maximum width or height. Defaults to 1920.
+   * @returns {Buffer} PNG-encoded bytes (potentially resized).
+   */
+  screenshotToPngResized(screenshot: ScreenshotResult, maxDimension?: number | undefined | null): Array<number>
+  /**
+   * Convert a screenshot to base64-encoded PNG string.
+   * Useful for embedding in JSON responses or passing to LLMs.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to convert.
+   * @returns {string} Base64-encoded PNG string.
+   */
+  screenshotToBase64Png(screenshot: ScreenshotResult): string
+  /**
+   * Convert a screenshot to base64-encoded PNG string with resizing.
+   * If the image exceeds maxDimension in either width or height,
+   * it will be resized while maintaining aspect ratio.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to convert.
+   * @param {number} [maxDimension] - Maximum width or height. Defaults to 1920.
+   * @returns {string} Base64-encoded PNG string (potentially resized).
+   */
+  screenshotToBase64PngResized(screenshot: ScreenshotResult, maxDimension?: number | undefined | null): string
+  /**
+   * Get the dimensions a screenshot would have after resizing.
+   *
+   * @param {ScreenshotResult} screenshot - The screenshot to check.
+   * @param {number} maxDimension - Maximum width or height.
+   * @returns {ResizedDimensions} Object with width and height after resize.
+   */
+  screenshotResizedDimensions(screenshot: ScreenshotResult, maxDimension: number): ResizedDimensions
+  /**
+   * (async) Get all window elements for a given application name.
+   *
+   * @param {string} name - The name of the application whose windows will be retrieved.
+   * @returns {Promise<Array<Element>>} A list of window elements belonging to the application.
+   */
+  windowsForApplication(name: string): Promise<Array<Element>>
+  /**
+   * (async) Get the UI tree for all open applications in parallel.
+   *
+   * @returns {Promise<Array<UINode>>} List of UI trees for all applications.
+   */
+  getAllApplicationsTree(): Promise<Array<UINode>>
+  /**
+   * (async) Press a key globally.
+   *
+   * @param {string} key - The key to press (e.g., "Enter", "Ctrl+C", "F1").
+   * @param {string} [process] - Process name for window screenshot capture. If provided, enables window screenshots.
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot (requires process).
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after key press.
+   */
+  pressKey(key: string, process?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): Promise<void>
+  /**
+   * (async) Execute JavaScript in a browser tab.
+   * Finds the browser window by process name and executes the script.
+   *
+   * @param {string} script - The JavaScript code to execute in browser context.
+   * @param {string} process - Process name to scope the browser window (e.g., 'chrome', 'msedge'). Required.
+   * @param {number} [timeoutMs=10000] - Timeout in milliseconds for finding the browser window.
+   * @returns {Promise<string>} The result of script execution.
+   */
+  executeBrowserScript(script: string, process: string, timeoutMs?: number | undefined | null): Promise<string>
+  /**
+   * (async) Close a browser tab safely.
+   *
+   * This method can identify the tab to close by:
+   * - tabId: Close a specific tab by its Chrome tab ID
+   * - url: Find and close a tab matching this URL (partial match supported)
+   * - title: Find and close a tab matching this title (case-insensitive partial match)
+   * - If none provided, closes the currently active tab
+   *
+   * Returns information about the closed tab for verification.
+   * Returns null if no browser extension is connected or tab couldn't be found.
+   *
+   * Safety:
+   * - Will NOT close protected browser pages (chrome://, edge://, about:, etc.)
+   * - Returns the closed tab's URL/title so you can verify the correct tab was closed
+   *
+   * @param {number} [tabId] - Specific Chrome tab ID to close.
+   * @param {string} [url] - URL to match (partial match supported).
+   * @param {string} [title] - Title to match (case-insensitive partial match).
+   * @returns {Promise<CloseTabResult | null>} Info about closed tab, or null if no extension/tab found.
+   *
+   * @example
+   * // Close by URL
+   * const result = await desktop.closeTab({ url: "example.com" });
+   *
+   * @example
+   * // Close by title
+   * const result = await desktop.closeTab({ title: "My Page" });
+   *
+   * @example
+   * // Close active tab
+   * const result = await desktop.closeTab();
+   */
+  closeTab(options?: CloseTabOptions | undefined | null): Promise<CloseTabResult | null>
+  /**
+   * (async) Delay execution for a specified number of milliseconds.
+   * Useful for waiting between actions to ensure UI stability.
+   *
+   * @param {number} delayMs - Delay in milliseconds.
+   * @returns {Promise<void>}
+   */
+  delay(delayMs: number): Promise<void>
+  /**
+   * Navigate to a URL in a browser.
+   * This is the recommended method for browser navigation - more reliable than
+   * manually manipulating the address bar with keyboard/mouse actions.
+   *
+   * @param {string} url - URL to navigate to
+   * @param {string | null} browser - Optional browser name ('Chrome', 'Firefox', 'Edge', 'Brave', 'Opera', 'Vivaldi', or 'Default')
+   * @param {boolean} [includeWindowScreenshot=true] - Whether to capture window screenshot after navigation
+   * @param {boolean} [includeMonitorScreenshots=false] - Whether to capture monitor screenshots after navigation
+   * @returns {Promise<Element>} The browser window element
+   */
+  navigateBrowser(url: string, browser?: string | undefined | null, includeWindowScreenshot?: boolean | undefined | null, includeMonitorScreenshots?: boolean | undefined | null): Element
+  /**
+   * (async) Set the zoom level to a specific percentage.
+   *
+   * @param {number} percentage - The zoom percentage (e.g., 100 for 100%, 150 for 150%, 50 for 50%).
+   */
+  setZoom(percentage: number): Promise<void>
+  /**
+   * (async) Run Gemini Computer Use agentic loop.
+   *
+   * Provide a goal and target process, and this will autonomously take actions
+   * (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
+   * Uses Gemini's vision model to analyze screenshots and decide actions.
+   *
+   * @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
+   * @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
+   * @param {number} [maxSteps=20] - Maximum number of steps before stopping
+   * @param {function} [onStep] - Optional callback invoked after each step with step details
+   * @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
+   */
+  geminiComputerUse(process: string, goal: string, maxSteps?: number | undefined | null, onStep?: ((err: null | Error, step: ComputerUseStep) => void) | undefined): Promise<ComputerUseResult>
+  /**
+   * Stop all currently executing operations.
+   *
+   * This cancels the internal cancellation token, which will cause any
+   * operations that check `isCancelled()` to abort. After calling this,
+   * you should create a new Desktop instance to start fresh.
+   */
+  stopExecution(): void
+  /**
+   * Check if execution has been cancelled.
+   *
+   * Returns `true` if `stopExecution()` has been called.
+   * Long-running operations should periodically check this and abort if true.
+   */
+  isCancelled(): boolean
+  /**
+   * Stop all active highlight overlays globally.
+   *
+   * This finds and destroys all highlight overlay windows that were created
+   * by `element.highlight()`. Useful for cleaning up highlights without
+   * needing to track individual HighlightHandle objects.
+   *
+   * @returns {number} The number of highlights that were stopped.
+   */
+  stopHighlighting(): number
+  /**
+   * Show inspect overlay with indexed elements for visual debugging.
+   *
+   * Displays a transparent overlay window with colored rectangles around UI elements,
+   * showing their index numbers for click targeting. Use `hideInspectOverlay()` to remove.
+   *
+   * @param {InspectElement[]} elements - Array of elements to highlight with their bounds.
+   * @param {object} windowBounds - The window bounds {x, y, width, height} to constrain the overlay.
+   * @param {OverlayDisplayMode} [displayMode='Index'] - What to show in labels: 'Index', 'Role', 'Name', etc.
+   */
+  showInspectOverlay(elements: Array<InspectElement>, windowBounds: Bounds, displayMode?: OverlayDisplayMode | undefined | null): void
+  /** Show inspect overlay (non-Windows stub). */
+  showInspectOverlay(elements: Array<InspectElement>, windowBounds: Bounds, displayMode?: OverlayDisplayMode | undefined | null): void
+  /**
+   * Hide any active inspect overlay.
+   *
+   * This hides the visual overlay that was shown via `showInspectOverlay()`.
+   * Can be called from any thread.
+   */
+  hideInspectOverlay(): void
+  /**
+   * Verify that an element matching the selector exists within the same application as the scope element.
+   *
+   * This is used for post-action verification - checking that an expected element appeared after
+   * performing an action (e.g., a success dialog after clicking submit).
+   *
+   * @param {Element} scopeElement - The element to get the application scope from (typically the element the action was performed on)
+   * @param {string} selector - The selector string to search for
+   * @param {number} [timeoutMs=2000] - How long to wait for the element to appear in milliseconds
+   * @returns {Element} The found element if verification passes
+   * @throws Error if the element is not found within the timeout
+   */
+  verifyElementExists(scopeElement: Element, selector: string, timeoutMs?: number | undefined | null): Promise<Element>
+  /**
+   * Verify that an element matching the selector does NOT exist within the same application as the scope element.
+   *
+   * This is used for post-action verification - checking that an element disappeared after
+   * performing an action (e.g., a modal dialog closed after clicking OK).
+   *
+   * @param {Element} scopeElement - The element to get the application scope from (typically the element the action was performed on)
+   * @param {string} selector - The selector string that should NOT be found
+   * @param {number} [timeoutMs=2000] - How long to wait/check that the element doesn't appear in milliseconds
+   * @returns {void}
+   * @throws Error if the element IS found (meaning verification failed)
+   */
+  verifyElementNotExists(scopeElement: Element, selector: string, timeoutMs?: number | undefined | null): Promise<void>
+}
+/** A UI element in the accessibility tree. */
+export declare class Element {
+  /**
+   * Get the element's ID.
+   *
+   * @returns {string | null} The element's ID, if available.
+   */
+  id(): string | null
+  /**
+   * Get the element's role.
+   *
+   * @returns {string} The element's role (e.g., "button", "textfield").
+   */
+  role(): string
+  /**
+   * Get all attributes of the element.
+   *
+   * @returns {UIElementAttributes} The element's attributes.
+   */
+  attributes(): UIElementAttributes
+  /**
+   * Get the element's name.
+   *
+   * @returns {string | null} The element's name, if available.
+   */
+  name(): string | null
+  /**
+   * Get children of this element.
+   *
+   * @returns {Array<Element>} List of child elements.
+   */
+  children(): Array<Element>
+  /**
+   * Get the parent element.
+   *
+   * @returns {Element | null} The parent element, if available.
+   */
+  parent(): Element | null
+  /**
+   * Get element bounds.
+   *
+   * @returns {Bounds} The element's bounds (x, y, width, height).
+   */
+  bounds(): Bounds
+  /**
+   * Click on this element.
+   *
+   * @param {ActionOptions} [options] - Options for the click action.
+   * @returns {Promise<ClickResult>} Result of the click operation.
+   */
+  click(options?: ActionOptions | undefined | null): Promise<ClickResult>
+  /**
+   * Double click on this element.
+   *
+   * @param {ActionOptions} [options] - Options for the double click action.
+   * @returns {ClickResult} Result of the click operation.
+   */
+  doubleClick(options?: ActionOptions | undefined | null): ClickResult
+  /**
+   * Right click on this element.
+   *
+   * @param {ActionOptions} [options] - Options for the right click action.
+   */
+  rightClick(options?: ActionOptions | undefined | null): void
+  /**
+   * Hover over this element.
+   *
+   * @param {ActionOptions} [options] - Optional action options.
+   */
+  hover(options?: ActionOptions | undefined | null): void
+  /**
+   * Check if element is visible.
+   *
+   * @returns {boolean} True if the element is visible.
+   */
+  isVisible(): boolean
+  /**
+   * Check if element is enabled.
+   *
+   * @returns {boolean} True if the element is enabled.
+   */
+  isEnabled(): boolean
+  /** Focus this element. */
+  focus(): void
+  /**
+   * Get text content of this element.
+   *
+   * @param {number} [maxDepth] - Maximum depth to search for text.
+   * @returns {string} The element's text content.
+   */
+  text(maxDepth?: number | undefined | null): string
+  /**
+   * Type text into this element.
+   *
+   * @param {string} text - The text to type.
+   * @param {TypeTextOptions} [options] - Options for typing.
+   * @returns {ActionResult} Result of the type operation.
+   */
+  typeText(text: string, options?: TypeTextOptions | undefined | null): ActionResult
+  /**
+   * Press a key while this element is focused.
+   *
+   * @param {string} key - The key to press.
+   * @param {ActionOptions} [options] - Options for the key press action.
+   * @returns {ActionResult} Result of the key press operation.
+   */
+  pressKey(key: string, options?: ActionOptions | undefined | null): ActionResult
+  /**
+   * Set value of this element.
+   *
+   * @param {string} value - The value to set.
+   * @param {ActionOptions} [options] - Options for the set value action.
+   * @returns {ActionResult} Result of the set value operation.
+   */
+  setValue(value: string, options?: ActionOptions | undefined | null): ActionResult
+  /**
+   * Perform a named action on this element.
+   *
+   * @param {string} action - The action to perform.
+   */
+  performAction(action: string): void
+  /**
+   * Invoke this element (triggers the default action).
+   * This is often more reliable than clicking for controls like radio buttons or menu items.
+   *
+   * @param {ActionOptions} [options] - Options for the invoke action.
+   * @returns {ActionResult} Result of the invoke operation.
+   */
+  invoke(options?: ActionOptions | undefined | null): ActionResult
+  /**
+   * Scroll the element in a given direction.
+   *
+   * @param {string} direction - The direction to scroll.
+   * @param {number} amount - The amount to scroll.
+   * @param {ActionOptions} [options] - Options for the scroll action.
+   * @returns {ActionResult} Result of the scroll operation.
+   */
+  scroll(direction: string, amount: number, options?: ActionOptions | undefined | null): ActionResult
+  /** Activate the window containing this element. */
+  activateWindow(): void
+  /** Minimize the window containing this element. */
+  minimizeWindow(): void
+  /** Maximize the window containing this element. */
+  maximizeWindow(): void
+  /**
+   * Check if element is focused.
+   *
+   * @returns {boolean} True if the element is focused.
+   */
+  isFocused(): boolean
+  /**
+   * Check if element is keyboard focusable.
+   *
+   * @returns {boolean} True if the element can receive keyboard focus.
+   */
+  isKeyboardFocusable(): boolean
+  /**
+   * Drag mouse from start to end coordinates.
+   *
+   * @param {number} startX - Starting X coordinate.
+   * @param {number} startY - Starting Y coordinate.
+   * @param {number} endX - Ending X coordinate.
+   * @param {number} endY - Ending Y coordinate.
+   * @param {ActionOptions} [options] - Optional action options.
+   */
+  mouseDrag(startX: number, startY: number, endX: number, endY: number, options?: ActionOptions | undefined | null): void
+  /**
+   * Press and hold mouse at coordinates.
+   *
+   * @param {number} x - X coordinate.
+   * @param {number} y - Y coordinate.
+   */
+  mouseClickAndHold(x: number, y: number): void
+  /**
+   * Move mouse to coordinates.
+   *
+   * @param {number} x - X coordinate.
+   * @param {number} y - Y coordinate.
+   */
+  mouseMove(x: number, y: number): void
+  /**
+   * Release mouse button.
+   *
+   * @param {ActionOptions} [options] - Optional action options.
+   */
+  mouseRelease(options?: ActionOptions | undefined | null): void
+  /**
+   * Create a locator from this element.
+   * Accepts either a selector string or a Selector object.
+   *
+   * @param {string | Selector} selector - The selector.
+   * @returns {Locator} A new locator for finding elements.
+   */
+  locator(selector: string | Selector): Locator
+  /**
+   * Get the containing application element.
+   *
+   * @returns {Element | null} The containing application element, if available.
+   */
+  application(): Element | null
+  /**
+   * Get the containing window element.
+   *
+   * @returns {Element | null} The containing window element, if available.
+   */
+  window(): Element | null
+  /**
+   * Highlights the element with a colored border and optional text overlay.
+   *
+   * @param {number} [color] - Optional BGR color code (32-bit integer). Default: 0x0000FF (red)
+   * @param {number} [durationMs] - Optional duration in milliseconds.
+   * @param {string} [text] - Optional text to display. Text will be truncated to 10 characters.
+   * @param {TextPosition} [textPosition] - Optional position for the text overlay (default: Top)
+   * @param {FontStyle} [fontStyle] - Optional font styling for the text
+   * @returns {HighlightHandle} Handle that can be used to close the highlight early
+   */
+  highlight(color?: number | undefined | null, durationMs?: number | undefined | null, text?: string | undefined | null, textPosition?: TextPosition | undefined | null, fontStyle?: FontStyle | undefined | null): HighlightHandle
+  /**
+   * Capture a screenshot of this element.
+   *
+   * @returns {ScreenshotResult} The screenshot data containing image data and dimensions.
+   */
+  capture(): ScreenshotResult
+  /**
+   * Get the process ID of the application containing this element.
+   *
+   * @returns {number} The process ID.
+   */
+  processId(): number
+  /**
+   * Get the process name of the application containing this element.
+   *
+   * @returns {string} The process name (e.g., "chrome", "notepad").
+   */
+  processName(): string
+  toString(): string
+  /**
+   * Sets the transparency of the window.
+   *
+   * @param {number} percentage - The transparency percentage from 0 (completely transparent) to 100 (completely opaque).
+   * @returns {void}
+   */
+  setTransparency(percentage: number): void
+  /**
+   * Close the element if it's closable (like windows, applications).
+   * Does nothing for non-closable elements (like buttons, text, etc.).
+   *
+   * @returns {void}
+   */
+  close(): void
+  /**
+   * Get the monitor containing this element.
+   *
+   * @returns {Monitor} The monitor information for the display containing this element.
+   */
+  monitor(): Monitor
+  /**
+   * Scrolls the element into view within its window viewport.
+   * If the element is already visible, returns immediately.
+   *
+   * @returns {void}
+   */
+  scrollIntoView(): void
+  /**
+   * Selects an option in a dropdown or combobox by its visible text.
+   *
+   * @param {string} optionName - The visible text of the option to select.
+   * @param {ActionOptions} [options] - Optional action options.
+   * @returns {void}
+   */
+  selectOption(optionName: string, options?: ActionOptions | undefined | null): void
+  /**
+   * Lists all available option strings from a dropdown or list box.
+   *
+   * @returns {Array<string>} List of available option strings.
+   */
+  listOptions(): Array<string>
+  /**
+   * Checks if a control (like a checkbox or toggle switch) is currently toggled on.
+   *
+   * @returns {boolean} True if the control is toggled on.
+   */
+  isToggled(): boolean
+  /**
+   * Sets the state of a toggleable control.
+   * It only performs an action if the control is not already in the desired state.
+   *
+   * @param {boolean} state - The desired toggle state.
+   * @param {ActionOptions} [options] - Optional action options.
+   * @returns {void}
+   */
+  setToggled(state: boolean, options?: ActionOptions | undefined | null): void
+  /**
+   * Checks if an element is selected (e.g., list item, tree node, tab).
+   *
+   * @returns {boolean} True if the element is selected, false otherwise.
+   */
+  isSelected(): boolean
+  /**
+   * Sets the selection state of a selectable item.
+   * Only performs an action if the element is not already in the desired state.
+   *
+   * @param {boolean} state - The desired selection state.
+   * @param {ActionOptions} [options] - Optional action options.
+   * @returns {void}
+   */
+  setSelected(state: boolean, options?: ActionOptions | undefined | null): void
+  /**
+   * Gets the current value from a range-based control like a slider or progress bar.
+   *
+   * @returns {number} The current value of the range control.
+   */
+  getRangeValue(): number
+  /**
+   * Sets the value of a range-based control like a slider.
+   *
+   * @param {number} value - The value to set.
+   * @param {ActionOptions} [options] - Optional action options.
+   * @returns {void}
+   */
+  setRangeValue(value: number, options?: ActionOptions | undefined | null): void
+  /**
+   * Gets the value attribute of an element (text inputs, combo boxes, etc.).
+   *
+   * @returns {string | null} The value attribute, or null if not available.
+   */
+  getValue(): string | null
+  /**
+   * Execute JavaScript in web browser using dev tools console.
+   * Returns the result of the script execution as a string.
+   *
+   * @param {string} script - The JavaScript code to execute.
+   * @returns {Promise<string>} The result of script execution.
+   */
+  executeBrowserScript(script: string): Promise<string>
+  /**
+   * Get the UI tree starting from this element.
+   * Returns a tree structure containing this element and all its descendants.
+   *
+   * @param {number} [maxDepth=100] - Maximum depth to traverse (default: 100).
+   * @returns {UINode} Tree structure with recursive children.
+   */
+  getTree(maxDepth?: number | undefined | null): UINode
+}
+/** Locator for finding UI elements by selector. */
+export declare class Locator {
+  /**
+   * (async) Get the first matching element.
+   *
+   * @param {number} [timeoutMs] - Timeout in milliseconds (default: 10000).
+   * @returns {Promise<Element>} The first matching element.
+   */
+  first(timeoutMs?: number | undefined | null): Promise<Element>
+  /**
+   * (async) Get all matching elements.
+   *
+   * @param {number} timeoutMs - Timeout in milliseconds (required).
+   * @param {number} [depth] - Maximum depth to search.
+   * @returns {Promise<Array<Element>>} List of matching elements.
+   */
+  all(timeoutMs: number, depth?: number | undefined | null): Promise<Array<Element>>
+  /**
+   * Set a default timeout for this locator.
+   *
+   * @param {number} timeoutMs - Timeout in milliseconds.
+   * @returns {Locator} A new locator with the specified timeout.
+   */
+  timeout(timeoutMs: number): Locator
+  /**
+   * Set the root element for this locator.
+   *
+   * @param {Element} element - The root element.
+   * @returns {Locator} A new locator with the specified root element.
+   */
+  within(element: Element): Locator
+  /**
+   * Chain another selector.
+   * Accepts either a selector string or a Selector object.
+   *
+   * @param {string | Selector} selector - The selector.
+   * @returns {Locator} A new locator with the chained selector.
+   */
+  locator(selector: string | Selector): Locator
+  /**
+   * (async) Validate element existence without throwing an error.
+   *
+   * @param {number} timeoutMs - Timeout in milliseconds (required).
+   * @returns {Promise<ValidationResult>} Validation result with exists flag and optional element.
+   */
+  validate(timeoutMs: number): Promise<ValidationResult>
+  /**
+   * (async) Wait for an element to meet a specific condition.
+   *
+   * @param {string} condition - Condition to wait for: 'exists', 'visible', 'enabled', 'focused'
+   * @param {number} timeoutMs - Timeout in milliseconds (required).
+   * @returns {Promise<Element>} The element when condition is met.
+   */
+  waitFor(condition: string, timeoutMs: number): Promise<Element>
+}
+/** Selector for locating UI elements. Provides a typed alternative to the string based selector API. */
+export declare class Selector {
+  /** Create a selector that matches elements by their accessibility `name`. */
+  static name(name: string): Selector
+  /** Create a selector that matches elements by role (and optionally name). */
+  static role(role: string, name?: string | undefined | null): Selector
+  /** Create a selector that matches elements by accessibility `id`. */
+  static id(id: string): Selector
+  /** Create a selector that matches elements by the text they display. */
+  static text(text: string): Selector
+  /** Create a selector from an XPath-like path string. */
+  static path(path: string): Selector
+  /** Create a selector that matches elements by a native automation id (e.g., AutomationID on Windows). */
+  static nativeId(id: string): Selector
+  /** Create a selector that matches elements by their class name. */
+  static className(name: string): Selector
+  /** Create a selector from an arbitrary attribute map. */
+  static attributes(attributes: Record<string, string>): Selector
+  /** Chain another selector onto this selector. */
+  chain(other: Selector): Selector
+  /** Filter by visibility. */
+  visible(isVisible: boolean): Selector
+  /**
+   * Create a selector that selects the nth element from matches.
+   * Positive values are 0-based from the start (0 = first, 1 = second).
+   * Negative values are from the end (-1 = last, -2 = second-to-last).
+   */
+  static nth(index: number): Selector
+  /**
+   * Create a selector that matches elements having at least one descendant matching the inner selector.
+   * This is similar to Playwright's :has() pseudo-class.
+   */
+  static has(innerSelector: Selector): Selector
+  /**
+   * Create a selector that navigates to the parent element.
+   * This is similar to Playwright's .. syntax.
+   */
+  static parent(): Selector
+  /**
+   * Create a selector that scopes the search to a specific process.
+   * This is typically used as the first part of a chained selector.
+   * Example: `Selector.process("chrome").chain(Selector.role("Button", "Submit"))`
+   */
+  static process(processName: string): Selector
+  /**
+   * Create a selector that scopes the search to a specific window within a process.
+   * Typically chained after a process selector.
+   * Example: `Selector.process("notepad").chain(Selector.window("Untitled"))`
+   */
+  static window(title: string): Selector
+}
+export declare class HighlightHandle {
+  close(): void
+}
+/**
+ * Window manager for controlling window states
+ *
+ * Provides functionality for:
+ * - Enumerating windows with Z-order tracking
+ * - Bringing windows to front (bypassing Windows focus-stealing prevention)
+ * - Minimizing/maximizing windows
+ * - Capturing and restoring window states for workflows
+ */
+export declare class WindowManager {
+  /** Create a new WindowManager instance */
+  constructor()
+  /** Update window cache with current window information */
+  updateWindowCache(): Promise<void>
+  /** Get topmost window for a process by name */
+  getTopmostWindowForProcess(process: string): Promise<WindowInfo | null>
+  /** Get topmost window for a specific PID */
+  getTopmostWindowForPid(pid: number): Promise<WindowInfo | null>
+  /** Get all visible always-on-top windows */
+  getAlwaysOnTopWindows(): Promise<Array<WindowInfo>>
+  /**
+   * Minimize only always-on-top windows (excluding target)
+   * Returns the number of windows minimized
+   */
+  minimizeAlwaysOnTopWindows(targetHwnd: number): Promise<number>
+  /** Minimize all visible windows except the target */
+  minimizeAllExcept(targetHwnd: number): Promise<number>
+  /**
+   * Maximize window if not already maximized
+   * Returns true if the window was maximized (wasn't already maximized)
+   */
+  maximizeIfNeeded(hwnd: number): Promise<boolean>
+  /**
+   * Bring window to front using AttachThreadInput trick
+   *
+   * This uses AttachThreadInput to bypass Windows' focus-stealing prevention.
+   * Returns true if the window is now in the foreground.
+   */
+  bringWindowToFront(hwnd: number): Promise<boolean>
+  /**
+   * Minimize window if not already minimized
+   * Returns true if the window was minimized (wasn't already minimized)
+   */
+  minimizeIfNeeded(hwnd: number): Promise<boolean>
+  /** Capture current state before workflow */
+  captureInitialState(): Promise<void>
+  /**
+   * Restore windows that were minimized and target window to their original state
+   * Returns the number of windows restored
+   */
+  restoreAllWindows(): Promise<number>
+  /** Clear captured state */
+  clearCapturedState(): Promise<void>
+  /** Check if a process is a UWP/Modern app */
+  isUwpApp(pid: number): Promise<boolean>
+  /** Track a window as the target for restoration */
+  setTargetWindow(hwnd: number): Promise<void>
+}