npm - testdriverai - Versions diffs - 7.3.9 → 7.3.10 - Mend

testdriverai 7.3.9 → 7.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +4 -0
package/agent/lib/commands.js +3 -2
package/docs/v7/assert.mdx +31 -0
package/docs/v7/client.mdx +29 -0
package/docs/v7/find.mdx +24 -0
package/mcp-server/dist/server.mjs +2 -2
package/mcp-server/src/server.ts +2 -2
package/package.json +1 -1
package/sdk.d.ts +19 -2
package/sdk.js +130 -59

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## [7.3.10](https://github.com/testdriverai/testdriverai/compare/v7.3.9...v7.3.10) (2026-02-16)
 ## [7.3.9](https://github.com/testdriverai/testdriverai/compare/v7.3.8...v7.3.9) (2026-02-12)

package/agent/lib/commands.js CHANGED Viewed

@@ -226,8 +226,8 @@ const createCommands = (
     const assertTimestamp = Date.now();
     const assertStartTime = assertTimestamp;
-    // Extract cache options
-    const { threshold = -1, cacheKey, os, resolution } = options;
+    // Extract cache and AI options
+    const { threshold = -1, cacheKey, os, resolution, ai } = options;
     // Debug log cache settings
     emitter.emit(
@@ -243,6 +243,7 @@ const createCommands = (
       cacheKey,
       os,
       resolution,
+      ai,
     });
     const assertDuration = Date.now() - assertStartTime;

package/docs/v7/assert.mdx CHANGED Viewed

@@ -13,6 +13,7 @@ Make AI-powered assertions about the current screen state using natural language
 ```javascript
 await testdriver.assert(assertion)
+await testdriver.assert(assertion, options)
 ```
 ## Parameters
@@ -21,6 +22,36 @@ await testdriver.assert(assertion)
   Natural language description of what should be true
 </ParamField>
+<ParamField path="options" type="object">
+  Optional configuration
+  <Expandable title="properties">
+    <ParamField path="ai" type="object">
+      AI sampling configuration for this assert call (overrides global `ai` config from constructor).
+      <Expandable title="properties">
+        <ParamField path="temperature" type="number">
+          Controls randomness. `0` = deterministic, higher = more creative. Default: model default.
+        </ParamField>
+        <ParamField path="top" type="object">
+          Sampling parameters
+          <Expandable title="properties">
+            <ParamField path="p" type="number">
+              Top-P (nucleus sampling). Range: 0-1.
+            </ParamField>
+            <ParamField path="k" type="number">
+              Top-K sampling. `1` = most deterministic.
+            </ParamField>
+          </Expandable>
+        </ParamField>
+      </Expandable>
+    </ParamField>
+  </Expandable>
+</ParamField>
 ## Returns
 `Promise<boolean>` - `true` if assertion passes, throws error if assertion fails

package/docs/v7/client.mdx CHANGED Viewed

@@ -52,6 +52,30 @@ const testdriver = new TestDriver(apiKey, options)
     <ParamField path="environment" type="object">
       Additional environment variables to pass to the sandbox
     </ParamField>
+    <ParamField path="ai" type="object">
+      Global AI sampling configuration. Controls how the AI model generates responses for `find()` verification and `assert()` calls. Can be overridden per call.
+      <Expandable title="properties">
+        <ParamField path="temperature" type="number">
+          Controls randomness in AI responses. `0` = deterministic (best for verification), higher values = more creative. Default: `0` for find verification, model default for assert.
+        </ParamField>
+        <ParamField path="top" type="object">
+          Nucleus and top-k sampling parameters
+          <Expandable title="properties">
+            <ParamField path="p" type="number">
+              Top-P (nucleus sampling). Limits token choices to the smallest set whose cumulative probability exceeds P. Lower values = more focused responses. Range: 0-1.
+            </ParamField>
+            <ParamField path="k" type="number">
+              Top-K sampling. Limits token choices to the top K most likely tokens. `1` = always pick the most likely token. `0` = disabled (consider all tokens).
+            </ParamField>
+          </Expandable>
+        </ParamField>
+      </Expandable>
+    </ParamField>
   </Expandable>
 </ParamField>
@@ -68,6 +92,11 @@ const testdriver = new TestDriver({
   analytics: true
 });
+// With AI config for stricter verification
+const testdriver = new TestDriver({
+  ai: { temperature: 0, top: { p: 0.9, k: 40 } }
+});
 // Or pass API key explicitly
 const testdriver = new TestDriver('your-api-key', {
   os: 'windows'

package/docs/v7/find.mdx CHANGED Viewed

@@ -41,6 +41,30 @@ const element = await testdriver.find(description, options)
     <ParamField path="zoom" type="boolean" default={false}>
       Enable two-phase zoom mode for better precision in crowded UIs with many similar elements.
     </ParamField>
+    <ParamField path="ai" type="object">
+      AI sampling configuration for this find call (overrides global `ai` config from constructor).
+      <Expandable title="properties">
+        <ParamField path="temperature" type="number">
+          Controls randomness. `0` = deterministic. Default: `0` for find verification.
+        </ParamField>
+        <ParamField path="top" type="object">
+          Sampling parameters
+          <Expandable title="properties">
+            <ParamField path="p" type="number">
+              Top-P (nucleus sampling). Range: 0-1.
+            </ParamField>
+            <ParamField path="k" type="number">
+              Top-K sampling. `1` = most deterministic.
+            </ParamField>
+          </Expandable>
+        </ParamField>
+      </Expandable>
+    </ParamField>
   </Expandable>
 </ParamField>

package/mcp-server/dist/server.mjs CHANGED Viewed

@@ -401,8 +401,8 @@ Debug mode (connect to existing sandbox):
         const TestDriverSDK = (await import("../../sdk.js")).default;
         // Determine preview mode from environment variable
         // TD_PREVIEW can be "ide", "browser", or "none"
-        // Default to "none" for MCP server (headless) unless explicitly set
-        const previewMode = process.env.TD_PREVIEW || "none";
+        // Default to "ide" so the live preview shows within the IDE
+        const previewMode = process.env.TD_PREVIEW || "ide";
         logger.debug("session_start: Preview mode", { preview: previewMode });
         // Get IP from params or environment (for self-hosted instances)
         const instanceIp = params.ip || process.env.TD_IP;

package/mcp-server/src/server.ts CHANGED Viewed

@@ -509,8 +509,8 @@ Debug mode (connect to existing sandbox):
       // Determine preview mode from environment variable
       // TD_PREVIEW can be "ide", "browser", or "none"
-      // Default to "none" for MCP server (headless) unless explicitly set
-      const previewMode = process.env.TD_PREVIEW || "none";
+      // Default to "ide" so the live preview shows within the IDE
+      const previewMode = process.env.TD_PREVIEW || "ide";
       logger.debug("session_start: Preview mode", { preview: previewMode });
       // Get IP from params or environment (for self-hosted instances)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "testdriverai",
-  "version": "7.3.9",
+  "version": "7.3.10",
   "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
   "main": "sdk.js",
   "types": "sdk.d.ts",

package/sdk.d.ts CHANGED Viewed

@@ -230,6 +230,8 @@ export interface TestDriverOptions {
   logging?: boolean;
   /** Enable/disable cache (default: true). Set to false to force regeneration on all find operations */
   cache?: boolean;
+  /** Global AI sampling configuration. Can be overridden per find() or assert() call. */
+  ai?: AIConfig;
   /** Cache threshold configuration for different methods */
   cacheThreshold?: {
     /** Threshold for find operations (default: 0.05 = 5% difference, 95% similarity) */
@@ -546,6 +548,19 @@ export interface FocusApplicationOptions {
   name: string;
 }
+/** AI sampling configuration for controlling model behavior */
+export interface AIConfig {
+  /** Temperature for AI sampling (0 = deterministic, higher = more creative). Default: 0 for find verification, model default for assert. */
+  temperature?: number;
+  /** Top-P and Top-K sampling parameters */
+  top?: {
+    /** Top-P (nucleus sampling). Controls diversity by limiting to top P probability mass. Range: 0-1. */
+    p?: number;
+    /** Top-K sampling. Limits choices to top K tokens. 1 = always pick most likely. 0 = disabled. */
+    k?: number;
+  };
+}
 /** Options for extract command */
 export interface ExtractOptions {
   /** What to extract */
@@ -564,6 +579,8 @@ export interface AssertOptions {
   os?: string;
   /** Screen resolution for cache partitioning */
   resolution?: string;
+  /** AI sampling configuration (overrides global ai config) */
+  ai?: AIConfig;
 }
 /** Options for exec command */
@@ -1028,7 +1045,7 @@ export default class TestDriverSDK {
   find(description: string, cacheThreshold?: number): ChainableElementPromise;
   find(
     description: string,
-    options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number },
+    options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number; ai?: AIConfig },
   ): ChainableElementPromise;
   /**
@@ -1267,7 +1284,7 @@ export default class TestDriverSDK {
    * // With custom threshold
    * await client.assert('the page loaded', { threshold: 0.01, cacheKey: 'login-test' });
    */
-  assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string }): Promise<boolean>;
+  assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string; ai?: AIConfig }): Promise<boolean>;
   /**
    * Extract information from the screen using AI

package/sdk.js CHANGED Viewed

@@ -430,8 +430,9 @@ class Element {
   /**
    * Find the element on screen
    * @param {string} [newDescription] - Optional new description to search for
-   * @param {Object} [options] - Optional options object with cacheThreshold, cacheKey, and/or timeout
+   * @param {Object} [options] - Optional options object with cache thresholds, cacheKey, and/or timeout
    * @param {number} [options.timeout] - Max time in ms to poll for element (polls every 5 seconds)
+   * @param {Object} [options.cache] - Cache configuration { thresholds: { screen, element } }
    * @returns {Promise<Element>} This element instance
    */
   async find(newDescription, options) {
@@ -468,10 +469,12 @@ class Element {
         this._screenshot = screenshot;
       }
-      // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold
+      // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
       let cacheKey = null;
       let cacheThreshold = null;
+      let perCommandThresholds = null; // Per-command { screen, element } override
       let zoom = false; // Default to disabled, enable with zoom: true
+      let perCommandAi = null; // Per-command AI config override
       if (typeof options === "number") {
         // Legacy: options is just a number threshold
@@ -482,6 +485,10 @@ class Element {
         cacheThreshold = options.cacheThreshold ?? null;
         // zoom defaults to false unless explicitly set to true
         zoom = options.zoom === true;
+        // Per-command cache thresholds: { cache: { thresholds: { screen: 0.1, element: 0.2 } } }
+        if (typeof options.cache === "object" && options.cache?.thresholds) {
+          perCommandThresholds = options.cache.thresholds;
+        }
       }
       // Use default cacheKey from SDK constructor if not provided in find() options
@@ -499,19 +506,25 @@ class Element {
       // - If cacheKey is provided, enable cache with threshold
       // - If no cacheKey, disable cache
       let threshold;
+      let elementSimilarity;
       if (this.sdk._cacheExplicitlyDisabled) {
         // Cache explicitly disabled via cache: false option or TD_NO_CACHE env
         threshold = -1;
+        elementSimilarity = -1;
         cacheKey = null; // Clear any cacheKey to ensure cache is truly disabled
       } else if (cacheKey) {
         // cacheKey provided - enable cache with threshold
-        threshold = cacheThreshold ?? this.sdk.cacheThresholds?.find ?? 0.01;
+        // Per-command thresholds > legacy cacheThreshold > global config
+        threshold = perCommandThresholds?.screen ?? cacheThreshold ?? this.sdk.cacheConfig?.thresholds?.find?.screen ?? 0.01;
+        elementSimilarity = perCommandThresholds?.element ?? this.sdk.cacheConfig?.thresholds?.find?.element ?? 0.8;
       } else if (cacheThreshold !== null) {
         // Explicit threshold provided without cacheKey
-        threshold = cacheThreshold;
+        threshold = perCommandThresholds?.screen ?? cacheThreshold;
+        elementSimilarity = perCommandThresholds?.element ?? this.sdk.cacheConfig?.thresholds?.find?.element ?? 0.8;
       } else {
         // No cacheKey, no explicit threshold - disable cache
         threshold = -1;
+        elementSimilarity = -1;
       }
       // Store the threshold for debugging
@@ -536,10 +549,16 @@ class Element {
         element: description,
         image: screenshot,
         threshold: threshold,
+        elementSimilarity: elementSimilarity,
         cacheKey: cacheKey,
         os: this.sdk.os,
         resolution: this.sdk.resolution,
         zoom: zoom,
+        ai: {
+          ...this.sdk.aiConfig,
+          ...(perCommandAi || {}),
+          top: { ...this.sdk.aiConfig?.top, ...(perCommandAi?.top || {}) },
+        },
       });
       const duration = Date.now() - startTime;
@@ -736,6 +755,9 @@ class Element {
       cacheHit: debugInfo.cacheHit,
       selectorId: this._response?.selector,
       consoleUrl: consoleUrl,
+      validated: response.validated ?? null,
+      validationConfidence: response.validationConfidence ?? null,
+      coordsUpdated: response.coordsUpdated ?? null,
     };
     if (!debugInfo.cacheHit) {
       meta.confidence = debugInfo.confidence;
@@ -1441,15 +1463,49 @@ class TestDriverSDK {
         findAll: -1,
         assert: -1,
       };
+      this.cacheConfig = {
+        enabled: false,
+        thresholds: {
+          find: { screen: -1, element: -1 },
+          assert: -1,
+        },
+      };
     } else {
-      // Cache enabled by default when cacheKey is provided
+      // Support cache object format: { cache: { thresholds: { find: { screen: 0.01, element: 0.8 }, assert: 0.05 } } }
+      const cacheOpts = typeof options.cache === "object" ? options.cache : {};
+      const thresholds = cacheOpts.thresholds || {};
+      const findThresholds = typeof thresholds.find === "object" ? thresholds.find : {};
+      this.cacheConfig = {
+        enabled: cacheOpts.enabled !== false,
+        thresholds: {
+          find: {
+            screen: findThresholds.screen ?? 0.01, // Default: 1% pixel diff allowed
+            element: findThresholds.element ?? 0.8, // Default: 80% OpenCV correlation
+          },
+          assert: thresholds.assert ?? 0.05, // Default: 5% pixel diff for assertions
+        },
+      };
+      // Legacy cacheThresholds - keep for backwards compatibility
       this.cacheThresholds = {
-        find: options.cacheThreshold?.find ?? 0.01, // Default: 1% threshold
-        findAll: options.cacheThreshold?.findAll ?? 0.01,
-        assert: options.cacheThreshold?.assert ?? 0.05, // Default: 5% threshold for assertions
+        find: options.cacheThreshold?.find ?? this.cacheConfig.thresholds.find.screen,
+        findAll: options.cacheThreshold?.findAll ?? this.cacheConfig.thresholds.find.screen,
+        assert: options.cacheThreshold?.assert ?? this.cacheConfig.thresholds.assert,
       };
     }
+    // AI sampling configuration
+    // Supports: { ai: { temperature: 0, top: { p: 1, k: 0 } } }
+    // Can be overridden per find() or assert() call
+    this.aiConfig = typeof options.ai === "object" ? {
+      temperature: options.ai.temperature,
+      top: {
+        p: options.ai.top?.p,
+        k: options.ai.top?.k,
+      },
+    } : {};
     // Redraw configuration
     // Supports both:
     //   - redraw: { enabled: true, diffThreshold: 0.1, screenRedraw: true, networkMonitor: true }
@@ -2791,7 +2847,7 @@ CAPTCHA_SOLVER_EOF`,
    * Automatically locates the element and returns it
    *
    * @param {string} description - Description of the element to find
-   * @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cacheThreshold}
+   * @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cache: { thresholds: { screen, element } }}
    * @returns {Promise<Element> & ChainableElement} Element instance that has been located, with chainable methods
    *
    * @example
@@ -2880,7 +2936,7 @@ CAPTCHA_SOLVER_EOF`,
    * Automatically locates all matching elements and returns them as an array
    *
    * @param {string} description - Description of the elements to find
-   * @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cacheThreshold}
+   * @param {number | Object} [options] - Cache options: number for threshold, or object with {cacheKey, cache: { thresholds: { screen } }}
    * @returns {Promise<Element[]>} Array of Element instances that have been located
    *
    * @example
@@ -2936,9 +2992,10 @@ CAPTCHA_SOLVER_EOF`,
     try {
       const screenshot = await this.system.captureScreenBase64();
-      // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold
+      // Handle options - can be a number (cacheThreshold) or object with cacheKey/cacheThreshold/cache
       let cacheKey = null;
       let cacheThreshold = null;
+      let perCommandThresholds = null; // Per-command { screen } override (findAll has no element threshold)
       if (typeof options === "number") {
         // Legacy: options is just a number threshold
@@ -2947,6 +3004,10 @@ CAPTCHA_SOLVER_EOF`,
         // New: options is an object with cacheKey and/or cacheThreshold
         cacheKey = options.cacheKey || null;
         cacheThreshold = options.cacheThreshold ?? null;
+        // Per-command cache thresholds: { cache: { thresholds: { screen: 0.1 } } }
+        if (typeof options.cache === "object" && options.cache?.thresholds) {
+          perCommandThresholds = options.cache.thresholds;
+        }
       }
       // Use default cacheKey from SDK constructor if not provided in findAll() options
@@ -2969,11 +3030,11 @@ CAPTCHA_SOLVER_EOF`,
         threshold = -1;
         cacheKey = null; // Clear any cacheKey to ensure cache is truly disabled
       } else if (cacheKey) {
-        // cacheKey provided - enable cache with threshold
-        threshold = cacheThreshold ?? this.cacheThresholds?.findAll ?? 0.01;
+        // cacheKey provided - enable cache with threshold (findAll only uses screen, no element)
+        threshold = perCommandThresholds?.screen ?? cacheThreshold ?? this.cacheConfig?.thresholds?.find?.screen ?? 0.01;
       } else if (cacheThreshold !== null) {
         // Explicit threshold provided without cacheKey
-        threshold = cacheThreshold;
+        threshold = perCommandThresholds?.screen ?? cacheThreshold;
       } else {
         // No cacheKey, no explicit threshold - disable cache
         threshold = -1;
@@ -2994,7 +3055,7 @@ CAPTCHA_SOLVER_EOF`,
       }
       const response = await this.apiClient.req(
-        "/api/v7.0.0/testdriver-agent/testdriver-find-all",
+        "/api/v7.0.0/testdriver/find-all",
         {
           session: this.getSessionId(),
           element: description,
@@ -3010,7 +3071,7 @@ CAPTCHA_SOLVER_EOF`,
       if (response && response.elements && response.elements.length > 0) {
         // Single log at the end - found elements
-        const formattedMessage = formatter.formatFindAllSingleLine(
+        const formattedMessage = formatter.formatElementsFound(
           description,
           response.elements.length,
           {
@@ -3093,7 +3154,7 @@ CAPTCHA_SOLVER_EOF`,
         const duration = Date.now() - startTime;
         // Single log at the end - no elements found
-        const formattedMessage = formatter.formatFindAllSingleLine(
+        const formattedMessage = formatter.formatElementsFound(
           description,
           0,
           {
@@ -3139,7 +3200,7 @@ CAPTCHA_SOLVER_EOF`,
       const duration = Date.now() - startTime;
       // Single log at the end - error
-      const formattedMessage = formatter.formatFindAllSingleLine(
+      const formattedMessage = formatter.formatElementsFound(
         description,
         0,
         {
@@ -3334,16 +3395,30 @@ CAPTCHA_SOLVER_EOF`,
           let result;
           // Special handling for assert to inject SDK options (cacheKey, os, resolution, threshold)
           // similar to how find() handles these in the Element class
+          // Note: assert does NOT use elementSimilarity (template matching not relevant for assertions)
           if (commandName === 'assert') {
             const assertion = args[0];
             const userOptions = args[1] || {};
+            // Support per-command cache threshold override: { cache: { threshold: 0.05 } }
+            const perCommandThreshold = typeof userOptions.cache === "object"
+              ? userOptions.cache.threshold
+              : undefined;
             // Merge SDK defaults with user options (user options take precedence)
             const mergedOptions = {
               cacheKey: userOptions.cacheKey ?? sdk.options.cacheKey,
               os: userOptions.os ?? sdk.os,
               resolution: userOptions.resolution ?? sdk.resolution,
-              threshold: userOptions.threshold !== undefined ? userOptions.threshold : (sdk.cacheThresholds?.assert ?? -1),
+              threshold: perCommandThreshold ?? userOptions.threshold ?? (sdk.cacheConfig?.thresholds?.assert ?? sdk.cacheThresholds?.assert ?? 0.05),
+              ai: {
+                ...sdk.aiConfig,
+                ...(typeof userOptions.ai === "object" ? userOptions.ai : {}),
+                top: {
+                  ...sdk.aiConfig?.top,
+                  ...(typeof userOptions.ai === "object" ? userOptions.ai?.top : {}),
+                },
+              },
             };
             // Note: commands.assert takes (assertion, options), shouldThrow is determined internally
@@ -3451,74 +3526,70 @@ CAPTCHA_SOLVER_EOF`,
   }
   /**
-   * Extract all visible text from the current screen using OCR (Tesseract)
-   * Returns structured data with text content, bounding boxes, and confidence scores
+   * Parse the current screen using OmniParser v2 to detect all UI elements
+   * Returns structured data with element types, bounding boxes, and content
+   * Requires enterprise or self-hosted plan.
    *
-   * @returns {Promise<OCRResult>} OCR extraction result
+   * @returns {Promise<ParseResult>} Parsed screen elements
    *
-   * @typedef {Object} OCRResult
-   * @property {OCRWord[]} words - Array of words with positions and confidence
-   * @property {string} fullText - All extracted text concatenated
-   * @property {number} confidence - Overall OCR confidence (0-100)
+   * @typedef {Object} ParseResult
+   * @property {ParsedElement[]} elements - Array of detected UI elements
+   * @property {string} annotatedImageUrl - URL of the annotated screenshot
    * @property {number} imageWidth - Width of the analyzed image
    * @property {number} imageHeight - Height of the analyzed image
    *
-   * @typedef {Object} OCRWord
-   * @property {string} content - The text content of the word
-   * @property {number} confidence - Confidence score (0-100)
-   * @property {Object} bbox - Bounding box coordinates
+   * @typedef {Object} ParsedElement
+   * @property {number} index - Element index
+   * @property {string} type - Element type (e.g. "text", "icon", "button")
+   * @property {string} content - Text content or description
+   * @property {string} interactivity - Interactivity level (e.g. "clickable", "non-interactive")
+   * @property {Object} bbox - Bounding box in pixel coordinates
    * @property {number} bbox.x0 - Left edge X coordinate
    * @property {number} bbox.y0 - Top edge Y coordinate
    * @property {number} bbox.x1 - Right edge X coordinate
    * @property {number} bbox.y1 - Bottom edge Y coordinate
+   * @property {Object} boundingBox - Bounding box as {left, top, width, height}
+   * @property {number} boundingBox.left - Left position
+   * @property {number} boundingBox.top - Top position
+   * @property {number} boundingBox.width - Element width
+   * @property {number} boundingBox.height - Element height
    *
    * @example
-   * // Get all text on screen
-   * const result = await testdriver.ocr();
-   * console.log(result.fullText);
-   * // "Welcome to TestDriver Sign In Email Password Submit"
+   * // Get all elements on screen
+   * const result = await testdriver.parse();
+   * console.log(`Found ${result.elements.length} elements`);
    *
    * @example
-   * // Find words matching a pattern
-   * const result = await testdriver.ocr();
-   * const buttons = result.words.filter(w =>
-   *   w.content.toLowerCase().includes('button')
-   * );
+   * // Find clickable elements
+   * const result = await testdriver.parse();
+   * const clickable = result.elements.filter(e => e.interactivity === 'clickable');
    *
    * @example
-   * // Get word positions for clicking
-   * const result = await testdriver.ocr();
-   * const submitWord = result.words.find(w => w.content === 'Submit');
-   * if (submitWord) {
-   *   // Calculate center of the word
-   *   const x = (submitWord.bbox.x0 + submitWord.bbox.x1) / 2;
-   *   const y = (submitWord.bbox.y0 + submitWord.bbox.y1) / 2;
-   *   await testdriver.click({ x, y });
-   * }
-   *
-   * @example
-   * // Check if specific text exists on screen
-   * const result = await testdriver.ocr();
-   * const hasError = result.words.some(w =>
-   *   w.content.toLowerCase().includes('error')
-   * );
+   * // Find text content
+   * const result = await testdriver.parse();
+   * const textElements = result.elements.filter(e => e.type === 'text');
+   * textElements.forEach(e => console.log(e.content));
    */
-  async ocr() {
+  async parse() {
     this._ensureConnected();
     const { events } = require("./agent/events.js");
-    this.emitter.emit(events.log.log, "🔍 Running OCR text extraction...");
+    this.emitter.emit(events.log.log, "🔍 Running OmniParser screen analysis...");
     const screenshot = await this.system.captureScreenBase64();
-    const response = await this.apiClient.req("ocr", {
+    const response = await this.apiClient.req("parse", {
       session: this.getSessionId(),
       image: screenshot,
     });
+    if (response.error) {
+      throw new Error(response.error);
+    }
     this.emitter.emit(
       events.log.log,
-      `✅ OCR complete: ${response.words?.length || 0} words extracted`,
+      `✅ Parse complete: ${response.elements?.length || 0} elements detected`,
     );
     return response;