npm - testdriverai - Versions diffs - 7.2.92 → 7.3.2 - Mend

testdriverai 7.2.92 → 7.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/agent/index.js +53 -13
package/agent/lib/analytics.js +4 -1
package/agent/lib/commands.js +252 -252
package/agent/lib/sandbox.js +1 -1
package/ai/skills/testdriver:aws-setup/SKILL.md +4 -4
package/ai/skills/testdriver:captcha/SKILL.md +1 -1
package/ai/skills/testdriver:ci-cd/SKILL.md +23 -23
package/ai/skills/testdriver:cloud/SKILL.md +1 -1
package/ai/skills/testdriver:customizing-devices/SKILL.md +5 -5
package/ai/skills/testdriver:running-tests/SKILL.md +11 -11
package/ai/skills/testdriver:secrets/SKILL.md +1 -1
package/ai/skills/testdriver:testdriver/SKILL.md +5 -5
package/ai/skills/testdriver:variables/SKILL.md +3 -3
package/debugger/index.html +0 -36
package/lib/vitest/hooks.mjs +3 -0
package/package.json +1 -1
package/sdk-log-formatter.js +8 -1
package/sdk.d.ts +86 -2
package/sdk.js +126 -24

package/agent/lib/sandbox.js CHANGED Viewed

@@ -144,7 +144,7 @@ const createSandbox = (emitter, analytics, sessionInstance) => {
         if (reply.traceId) {
           this.traceId = reply.traceId;
           logger.log('');
-          logger.log(`🔗 View Trace:`);
+          logger.log(`🔗 Trace Report (Share When Reporting Bugs):`);
           logger.log(`https://testdriver.sentry.io/explore/traces/trace/${reply.traceId}`);
         }

package/ai/skills/testdriver:aws-setup/SKILL.md CHANGED Viewed

@@ -49,7 +49,7 @@ That's it! No manual instance management needed.
     ```bash
     TD_OS=windows AWS_REGION=us-east-2 \
       AWS_LAUNCH_TEMPLATE_ID=lt-xxx AMI_ID=ami-xxx \
-      npx vitest run
+      vitest run
     ```
   </Step>
 </Steps>
@@ -219,7 +219,7 @@ TD_OS=windows \
 AWS_REGION=us-east-2 \
 AWS_LAUNCH_TEMPLATE_ID=lt-xxx \
 AMI_ID=ami-0504bf50fad62f312 \
-npx vitest run
+vitest run
 ```
 <Note>
@@ -256,7 +256,7 @@ jobs:
         run: npm ci
       - name: Run Windows tests with self-hosted instances
-        run: npx vitest run examples/*.test.mjs
+        run: vitest run examples/*.test.mjs
         env:
           TD_API_KEY: ${{ secrets.TD_API_KEY }}
           TD_OS: windows
@@ -306,7 +306,7 @@ For complete production examples, see:
 If you already have a running instance, you can skip automatic spawning by providing `TD_IP`:
 ```bash
-TD_OS=windows TD_IP=1.2.3.4 npx vitest run
+TD_OS=windows TD_IP=1.2.3.4 vitest run
 ```
 The `setup-aws` hook will detect `TD_IP` is already set and skip spawning a new instance.

package/ai/skills/testdriver:captcha/SKILL.md CHANGED Viewed

@@ -118,7 +118,7 @@ Add the key to your repository secrets and expose it in your workflow:
 ```yaml
 - name: Run Tests
-  run: npx vitest run
+  run: vitest run
   env:
     TWOCAPTCHA_API_KEY: ${{ secrets.TWOCAPTCHA_API_KEY }}
 ```

package/ai/skills/testdriver:ci-cd/SKILL.md CHANGED Viewed

@@ -65,7 +65,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
           - name: Run TestDriver tests
             env:
               TD_API_KEY: ${{ secrets.TD_API_KEY }}
-            run: npx vitest --run
+            run: vitest --run
     ```
     ### Parallel Execution
@@ -95,7 +95,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
           - name: Run tests (shard ${{ matrix.shard }}/4)
             env:
               TD_API_KEY: ${{ secrets.TD_API_KEY }}
-            run: npx vitest --run --shard=${{ matrix.shard }}/4
+            run: vitest --run --shard=${{ matrix.shard }}/4
     ```
     ### Multi-Platform Testing
@@ -124,7 +124,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
             env:
               TD_API_KEY: ${{ secrets.TD_API_KEY }}
               TD_OS: ${{ matrix.td-os }}
-            run: npx vitest --run
+            run: vitest --run
     ```
   </Tab>
@@ -153,7 +153,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
           - node_modules/
       script:
         - npm ci
-        - npx vitest --run
+        - vitest --run
       variables:
         TD_API_KEY: $TD_API_KEY
     ```
@@ -178,22 +178,22 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
     testdriver-shard-1:
       extends: .testdriver-base
       script:
-        - npx vitest --run --shard=1/4
+        - vitest --run --shard=1/4
     testdriver-shard-2:
       extends: .testdriver-base
       script:
-        - npx vitest --run --shard=2/4
+        - vitest --run --shard=2/4
     testdriver-shard-3:
       extends: .testdriver-base
       script:
-        - npx vitest --run --shard=3/4
+        - vitest --run --shard=3/4
     testdriver-shard-4:
       extends: .testdriver-base
       script:
-        - npx vitest --run --shard=4/4
+        - vitest --run --shard=4/4
     ```
     ### Multi-Platform Testing
@@ -218,14 +218,14 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
       variables:
         TD_OS: linux
       script:
-        - npx vitest --run
+        - vitest --run
     testdriver-windows:
       extends: .testdriver-base
       variables:
         TD_OS: windows
       script:
-        - npx vitest --run
+        - vitest --run
     ```
   </Tab>
@@ -260,7 +260,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
                 - node_modules
           - run:
               name: Run TestDriver tests
-              command: npx vitest --run
+              command: vitest --run
               environment:
                 TD_API_KEY: ${TD_API_KEY}
@@ -293,7 +293,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
           - run:
               name: Run TestDriver tests
               command: |
-                npx vitest --run --shard=$((CIRCLE_NODE_INDEX + 1))/$CIRCLE_NODE_TOTAL
+                vitest --run --shard=$((CIRCLE_NODE_INDEX + 1))/$CIRCLE_NODE_TOTAL
               environment:
                 TD_API_KEY: ${TD_API_KEY}
@@ -320,7 +320,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
           - run: npm ci
           - run:
               name: Run TestDriver tests on << parameters.td-os >>
-              command: npx vitest --run
+              command: vitest --run
               environment:
                 TD_API_KEY: ${TD_API_KEY}
                 TD_OS: << parameters.td-os >>
@@ -364,7 +364,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
       - script: npm ci
         displayName: 'Install dependencies'
-      - script: npx vitest --run
+      - script: vitest --run
         displayName: 'Run TestDriver tests'
         env:
           TD_API_KEY: $(TD_API_KEY)
@@ -398,7 +398,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
       - script: npm ci
         displayName: 'Install dependencies'
-      - script: npx vitest --run --shard=$(SHARD)
+      - script: vitest --run --shard=$(SHARD)
         displayName: 'Run TestDriver tests'
         env:
           TD_API_KEY: $(TD_API_KEY)
@@ -428,7 +428,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
       - script: npm ci
         displayName: 'Install dependencies'
-      - script: npx vitest --run
+      - script: vitest --run
         displayName: 'Run TestDriver tests on $(TD_OS)'
         env:
           TD_API_KEY: $(TD_API_KEY)
@@ -470,7 +470,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
             stage('Test') {
                 steps {
-                    sh 'npx vitest --run'
+                    sh 'vitest --run'
                 }
             }
         }
@@ -494,28 +494,28 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
                         agent { docker { image 'node:20' } }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run --shard=1/4'
+                            sh 'vitest --run --shard=1/4'
                         }
                     }
                     stage('Shard 2') {
                         agent { docker { image 'node:20' } }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run --shard=2/4'
+                            sh 'vitest --run --shard=2/4'
                         }
                     }
                     stage('Shard 3') {
                         agent { docker { image 'node:20' } }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run --shard=3/4'
+                            sh 'vitest --run --shard=3/4'
                         }
                     }
                     stage('Shard 4') {
                         agent { docker { image 'node:20' } }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run --shard=4/4'
+                            sh 'vitest --run --shard=4/4'
                         }
                     }
                 }
@@ -544,7 +544,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
                         }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run'
+                            sh 'vitest --run'
                         }
                     }
                     stage('Windows') {
@@ -554,7 +554,7 @@ TestDriver requires an API key to authenticate with the TestDriver cloud. Store
                         }
                         steps {
                             sh 'npm ci'
-                            sh 'npx vitest --run'
+                            sh 'vitest --run'
                         }
                     }
                 }

package/ai/skills/testdriver:cloud/SKILL.md CHANGED Viewed

@@ -87,7 +87,7 @@ To prevent tests from failing due to exceeding your license slot limit, we recom
             run: npm install
           - name: Run tests
-            run: npx vitest run
+            run: vitest run
             env:
               TD_API_KEY: ${{ secrets.TD_API_KEY }}
     ```

package/ai/skills/testdriver:customizing-devices/SKILL.md CHANGED Viewed

@@ -88,10 +88,10 @@ Then pass the variable when running tests:
 ```bash
 # Run tests on Windows
-TD_OS=windows npx vitest run
+TD_OS=windows vitest run
 # Run tests on Linux (default)
-TD_OS=linux npx vitest run
+TD_OS=linux vitest run
 ```
 This pattern is useful for running the same test suite across multiple operating systems in CI/CD:
@@ -102,7 +102,7 @@ strategy:
   matrix:
     os: [linux, windows]
 steps:
-  - run: TD_OS=${{ matrix.os }} npx vitest run
+  - run: TD_OS=${{ matrix.os }} vitest run
 ```
 ## Keepalive
@@ -139,13 +139,13 @@ const testdriver = TestDriver(context, {
 Then, you can run both tests in sequence:
 ```bash
-npx vitest run -t known-good.test.mjs -t work-in-progress.test.mjs
+vitest run -t known-good.test.mjs -t work-in-progress.test.mjs
 ```
 And as you make changes to `work-in-progress.test.mjs`, you can re-run just that file to quickly iterate on the failing steps.
 ```bash
-npx vitest run work-in-progress.test.mjs
+vitest run work-in-progress.test.mjs
 ```
 <Warning>

package/ai/skills/testdriver:running-tests/SKILL.md CHANGED Viewed

@@ -13,7 +13,7 @@ TestDriver works with Vitest's powerful test runner.
 ### Run All Tests
 ```bash
-npx vitest run
+vitest run
 ```
 Executes all test files in your project once and exits. Vitest automatically discovers files matching patterns like `*.test.js`, `*.test.mjs`, or `*.spec.js`.
@@ -21,7 +21,7 @@ Executes all test files in your project once and exits. Vitest automatically dis
 ### Run with Coverage
 ```bash
-npx vitest run --coverage
+vitest run --coverage
 ```
 Generates a code coverage report showing which lines of your source code were executed during tests. Coverage helps identify untested code paths. Results are displayed in the terminal and saved to a `coverage/` directory.
@@ -33,13 +33,13 @@ Generates a code coverage report showing which lines of your source code were ex
 ### Run Specific Tests
 ```bash
-npx vitest run login.test.js
+vitest run login.test.js
 ```
 Runs only the specified test file. Useful when debugging a single test or working on a specific feature.
 ```bash
-npx vitest run login.test.js checkout.test.js
+vitest run login.test.js checkout.test.js
 ```
 Runs multiple specific test files. List as many files as needed, separated by spaces.
@@ -47,7 +47,7 @@ Runs multiple specific test files. List as many files as needed, separated by sp
 ### Filter Tests by Name
 ```bash
-npx vitest run --grep "login"
+vitest run --grep "login"
 ```
 The `--grep` flag filters tests by their name (the string passed to `it()` or `test()`). Only tests whose names match the pattern will run. Supports regex patterns for complex matching.
@@ -55,7 +55,7 @@ The `--grep` flag filters tests by their name (the string passed to `it()` or `t
 ### Run Tests in a Folder
 ```bash
-npx vitest run tests/e2e/
+vitest run tests/e2e/
 ```
 Runs all test files within a specific directory. Great for organizing tests by type (unit, integration, e2e) and running them separately.
@@ -67,7 +67,7 @@ TestDriver runs each test in its own cloud sandbox, enabling true parallel execu
 ### Control Concurrency
 ```bash
-npx vitest run --maxConcurrency=5
+vitest run --maxConcurrency=5
 ```
 The `--maxConcurrency` flag limits how many tests run simultaneously. This should match your TestDriver license slots to avoid failures from exhausted slots.
@@ -75,7 +75,7 @@ The `--maxConcurrency` flag limits how many tests run simultaneously. This shoul
 ### Thread Configuration
 ```bash
-npx vitest run --pool=threads --minThreads=2 --maxThreads=8
+vitest run --pool=threads --minThreads=2 --maxThreads=8
 ```
 Fine-tune thread allocation for optimal performance:
@@ -139,7 +139,7 @@ export default defineConfig({
 Use Vitest UI for interactive debugging:
 ```bash
-npx vitest --ui
+vitest --ui
 ```
 The `--ui` flag launches a web-based interface for managing your test suite. Unlike `vitest run`, this starts in watch mode by default.
@@ -152,7 +152,7 @@ Open http://localhost:51204 to see:
 - **Filter and search** — Quickly find tests by name or status
 <Tip>
-  Combine with `--open` to automatically open the UI in your browser: `npx vitest --ui --open`
+  Combine with `--open` to automatically open the UI in your browser: `vitest --ui --open`
 </Tip>
@@ -167,7 +167,7 @@ Reports include:
 - **Error details** - Debug failures with full context
 ```bash
-$ npx vitest run
+$ vitest run
  ✓ login.test.js (2) 18.4s
    ✓ user can login 12.3s

package/ai/skills/testdriver:secrets/SKILL.md CHANGED Viewed

@@ -57,7 +57,7 @@ Store sensitive credentials as GitHub repository secrets so they're never expose
         TD_API_KEY: ${{ secrets.TD_API_KEY }}
         TD_USERNAME: ${{ secrets.TD_USERNAME }}
         TD_PASSWORD: ${{ secrets.TD_PASSWORD }}
-      run: npx vitest run
+      run: vitest run
     ```
   </Step>
 </Steps>

package/ai/skills/testdriver:testdriver/SKILL.md CHANGED Viewed

@@ -34,7 +34,7 @@ Use this agent when the user asks to:
 4. **⚠️ WRITE CODE IMMEDIATELY**: After EVERY successful action, append the generated code to the test file RIGHT AWAY. Do NOT wait until the end.
 5. **Verify Actions**: Use `check` after actions to verify they succeeded (for YOUR understanding only).
 6. **Add Assertions**: Use `assert` for test conditions that should be in the final test file.
-7. **⚠️ RUN THE TEST YOURSELF**: Use `npx vitest run <testFile> --reporter=dot` to run the test - do NOT tell the user to run it. Iterate until it passes.
+7. **⚠️ RUN THE TEST YOURSELF**: Use `vitest run <testFile> --reporter=dot` to run the test - do NOT tell the user to run it. Iterate until it passes.
 ## Prerequisites
@@ -224,7 +224,7 @@ await testdriver.screenshot(1, false, true);
 **Every MCP tool response includes "ACTION REQUIRED: Append this code..." - you MUST write that code to the test file IMMEDIATELY before proceeding to the next action.**
-**When ready to validate, RUN THE TEST YOURSELF using `npx vitest run`. Do NOT tell the user to run it.**
+**When ready to validate, RUN THE TEST YOURSELF using `vitest run`. Do NOT tell the user to run it.**
 ### Step 1: Start a Session
@@ -284,7 +284,7 @@ assert({ assertion: "the dashboard is visible" })
 **⚠️ YOU must run the test - do NOT tell the user to run it:**
 ```bash
-npx vitest run tests/login.test.mjs --reporter=dot
+vitest run tests/login.test.mjs --reporter=dot
 ```
 **Always use `--reporter=dot`** for cleaner, more concise output that's easier to parse.
@@ -356,7 +356,7 @@ view_local_screenshot({ path: ".testdriver/screenshots/checkout.test/before-asse
 ### Tips for MCP Workflow
 1. **⚠️ Write code IMMEDIATELY** - After EVERY action, append generated code to test file RIGHT AWAY
-2. **⚠️ Run tests YOURSELF** - Use `npx vitest run` - do NOT tell user to run tests
+2. **⚠️ Run tests YOURSELF** - Use `vitest run` - do NOT tell user to run tests
 3. **⚠️ Add screenshots liberally** - Include `await testdriver.screenshot()` after every significant action for debugging
 4. **⚠️ Use screenshot viewing for debugging** - When tests fail, use `list_local_screenshots` and `view_local_screenshot` to understand what went wrong
 5. **Work incrementally** - Don't try to build the entire test at once
@@ -500,7 +500,7 @@ const result = await testdriver.assert("dashboard is visible");
 ## Tips for Agents
 1. **⚠️ WRITE CODE IMMEDIATELY** - After EVERY successful MCP action, append the generated code to the test file RIGHT AWAY. Do NOT wait until the session ends.
-2. **⚠️ RUN TESTS YOURSELF** - Do NOT tell the user to run tests. YOU must run the tests using `npx vitest run <testFile> --reporter=dot`. Always use `--reporter=dot` for cleaner output. Analyze the output and iterate until the test passes. **Always share the test report link** (e.g., `https://app.testdriver.ai/projects/.../reports/...`) with the user after each run.
+2. **⚠️ RUN TESTS YOURSELF** - Do NOT tell the user to run tests. YOU must run the tests using `vitest run <testFile> --reporter=dot`. Always use `--reporter=dot` for cleaner output. Analyze the output and iterate until the test passes. **Always share the test report link** (e.g., `https://app.testdriver.ai/projects/.../reports/...`) with the user after each run.
 3. **⚠️ ADD SCREENSHOTS LIBERALLY** - Include `await testdriver.screenshot()` throughout your tests: after provision, before/after clicks, after typing, and before assertions. This creates a visual trail that makes debugging failures much easier.
 4. **⚠️ USE SCREENSHOT VIEWING FOR DEBUGGING** - When tests fail, use `list_local_screenshots` and `view_local_screenshot` MCP commands to see exactly what the UI looked like. This is often faster than re-running the test.
 5. **⚠️ NEVER USE `.wait()`** - Do NOT use any `.wait()` method. Instead, use `find()` with a `timeout` option to poll for elements, or use `assert()` / `check()` to verify state. Explicit waits are flaky and slow.

package/ai/skills/testdriver:variables/SKILL.md CHANGED Viewed

@@ -32,9 +32,9 @@ test('multi-environment testing', async (context) => {
 ```bash
 # Run against different environments
-TEST_ENV=dev npx vitest run
-TEST_ENV=staging npx vitest run
-TEST_ENV=production npx vitest run
+TEST_ENV=dev vitest run
+TEST_ENV=staging vitest run
+TEST_ENV=production vitest run
 ```
 ## Test Fixtures

package/debugger/index.html CHANGED Viewed

@@ -309,45 +309,9 @@
         user-select: none;
       }
-      .close-button {
-        position: fixed;
-        top: 12px;
-        right: 12px;
-        z-index: 100;
-        background: rgba(0, 0, 0, 0.8);
-        border: 1px solid #444;
-        color: #fff;
-        padding: 8px 16px;
-        border-radius: 6px;
-        cursor: pointer;
-        font-size: 13px;
-        font-weight: 500;
-        pointer-events: auto;
-        transition: all 0.2s ease;
-        display: flex;
-        align-items: center;
-        gap: 6px;
-      }
-      .close-button:hover {
-        background: rgba(220, 53, 69, 0.9);
-        border-color: #dc3545;
-      }
-      .close-button svg {
-        width: 14px;
-        height: 14px;
-        fill: currentColor;
-      }
     </style>
   </head>
   <body>
-    <!-- Close window button -->
-    <button class="close-button" onclick="window.close()" title="Close this window">
-      <svg viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
-      Close
-    </button>
     <!-- Loading screen -->
     <div class="loading-screen" id="loading-screen">
       <div class="testdriver-logo">

package/lib/vitest/hooks.mjs CHANGED Viewed

@@ -191,6 +191,7 @@ const lifecycleHandlers = new WeakMap();
  * });
  */
 export function TestDriver(context, options = {}) {
+  console.log("[DEBUG hooks entry] options:", JSON.stringify(options));
   if (!context || !context.task) {
     throw new Error(
       'TestDriver() requires Vitest context. Pass the context parameter from your test function: test("name", async (context) => { ... })',
@@ -246,6 +247,8 @@ export function TestDriver(context, options = {}) {
     config.apiRoot = process.env.TD_API_ROOT;
   }
+  console.log("[DEBUG hooks] options.preview:", options.preview, "config.preview:", config.preview);
   const testdriver = new TestDriverSDK(apiKey, config);
   testdriver.__vitestContext = context.task;
   testdriver._debugOnFailure = mergedOptions.debugOnFailure || false;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "testdriverai",
-  "version": "7.2.92",
+  "version": "7.3.2",
   "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
   "main": "sdk.js",
   "types": "sdk.d.ts",

package/sdk-log-formatter.js CHANGED Viewed

@@ -477,9 +477,10 @@ class SDKLogFormatter {
    * @param {boolean} passed - Whether assertion passed
    * @param {string} response - The AI response message
    * @param {number} durationMs - Duration in milliseconds
+   * @param {boolean} cacheHit - Whether the result was from cache
    * @returns {string} Formatted result line
    */
-  formatAssertResult(passed, response, durationMs) {
+  formatAssertResult(passed, response, durationMs, cacheHit = false) {
     const parts = [];
     this.addTimestamp(parts);
     parts.push(this.getResultPrefix());
@@ -490,6 +491,12 @@ class SDKLogFormatter {
       parts.push(chalk.red("failed"));
     }
+    // Add cache hit indicator (like find does)
+    if (cacheHit) {
+      parts.push(chalk.dim("·"));
+      parts.push(chalk.bold.yellow("⚡ cached"));
+    }
     // Add the response message (trimmed)
     if (response) {
       const trimmedResponse = response.trim().split('\n')[0]; // First line only

package/sdk.d.ts CHANGED Viewed

@@ -364,6 +364,42 @@ export interface HoverResult {
   [key: string]: any;
 }
+/** Bounding box for an OCR word */
+export interface OCRBoundingBox {
+  /** Left edge X coordinate */
+  x0: number;
+  /** Top edge Y coordinate */
+  y0: number;
+  /** Right edge X coordinate */
+  x1: number;
+  /** Bottom edge Y coordinate */
+  y1: number;
+}
+/** Individual word extracted by OCR */
+export interface OCRWord {
+  /** The text content of the word */
+  content: string;
+  /** Confidence score for this word (0-100) */
+  confidence: number;
+  /** Bounding box coordinates */
+  bbox: OCRBoundingBox;
+}
+/** Result from OCR text extraction */
+export interface OCRResult {
+  /** Array of extracted words with positions */
+  words: OCRWord[];
+  /** All text concatenated with spaces */
+  fullText: string;
+  /** Overall OCR confidence (0-100) */
+  confidence: number;
+  /** Width of the analyzed screenshot */
+  imageWidth: number;
+  /** Height of the analyzed screenshot */
+  imageHeight: number;
+}
 // ====================================
 // Command Options Interfaces
 // ====================================
@@ -520,6 +556,14 @@ export interface ExtractOptions {
 export interface AssertOptions {
   /** Assertion to check */
   assertion: string;
+  /** Cache threshold (0-1). Lower values require closer matches. Set to -1 to disable cache. */
+  threshold?: number;
+  /** Cache key for grouping cached assertions (enables caching when provided) */
+  cacheKey?: string;
+  /** Operating system identifier for cache partitioning */
+  os?: string;
+  /** Screen resolution for cache partitioning */
+  resolution?: string;
 }
 /** Options for exec command */
@@ -1209,9 +1253,21 @@ export default class TestDriverSDK {
   /**
    * Make an AI-powered assertion
    * @param assertion - Assertion to check
-   * @param options - Additional options (reserved for future use)
+   * @param options - Cache options for the assertion
+   *
+   * @example
+   * // Simple assertion
+   * await client.assert('the login form is visible');
+   *
+   * @example
+   * // With caching enabled via cacheKey
+   * await client.assert('the submit button is enabled', { cacheKey: 'my-test-run' });
+   *
+   * @example
+   * // With custom threshold
+   * await client.assert('the page loaded', { threshold: 0.01, cacheKey: 'login-test' });
    */
-  assert(assertion: string, options?: object): Promise<boolean>;
+  assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string }): Promise<boolean>;
   /**
    * Extract information from the screen using AI
@@ -1269,6 +1325,34 @@ export default class TestDriverSDK {
    */
   screenshot(filename?: string): Promise<string>;
+  /**
+   * Extract all visible text from the current screen using OCR (Tesseract)
+   * Returns structured data with text content, bounding boxes, and confidence scores
+   *
+   * @returns OCR extraction result with words, positions, and confidence
+   *
+   * @example
+   * // Get all text on screen
+   * const result = await testdriver.ocr();
+   * console.log(result.fullText);
+   *
+   * @example
+   * // Find and click text
+   * const result = await testdriver.ocr();
+   * const submit = result.words.find(w => w.content === 'Submit');
+   * if (submit) {
+   *   const x = (submit.bbox.x0 + submit.bbox.x1) / 2;
+   *   const y = (submit.bbox.y0 + submit.bbox.y1) / 2;
+   *   await testdriver.click({ x, y });
+   * }
+   *
+   * @example
+   * // Check if text exists
+   * const result = await testdriver.ocr();
+   * const hasError = result.words.some(w => w.content.toLowerCase().includes('error'));
+   */
+  ocr(): Promise<OCRResult>;
   /**
    * Wait for specified time
    * @deprecated Consider using element polling with find() instead of arbitrary waits