npm - testdriverai - Versions diffs - 7.3.12 → 7.3.13 - Mend

testdriverai 7.3.12 → 7.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

package/.github/skills/testdriver:ai/SKILL.md +204 -0
package/.github/skills/testdriver:assert/SKILL.md +284 -0
package/.github/skills/testdriver:aws-setup/SKILL.md +515 -0
package/.github/skills/testdriver:caching/SKILL.md +124 -0
package/.github/skills/testdriver:captcha/SKILL.md +159 -0
package/.github/skills/testdriver:ci-cd/SKILL.md +602 -0
package/.github/skills/testdriver:click/SKILL.md +286 -0
package/.github/skills/testdriver:client/SKILL.md +339 -0
package/.github/skills/testdriver:cloud/SKILL.md +119 -0
package/.github/skills/testdriver:customizing-devices/SKILL.md +153 -0
package/.github/skills/testdriver:dashcam/SKILL.md +418 -0
package/.github/skills/testdriver:debugging-with-screenshots/SKILL.md +271 -0
package/.github/skills/testdriver:device-config/SKILL.md +317 -0
package/.github/skills/testdriver:double-click/SKILL.md +102 -0
package/.github/skills/testdriver:elements/SKILL.md +605 -0
package/.github/skills/testdriver:enterprise/SKILL.md +114 -0
package/.github/skills/testdriver:examples/SKILL.md +7 -0
package/.github/skills/testdriver:exec/SKILL.md +345 -0
package/.github/skills/testdriver:find/SKILL.md +721 -0
package/.github/skills/testdriver:focus-application/SKILL.md +293 -0
package/.github/skills/testdriver:generating-tests/SKILL.md +36 -0
package/.github/skills/testdriver:hover/SKILL.md +278 -0
package/.github/skills/testdriver:locating-elements/SKILL.md +71 -0
package/.github/skills/testdriver:making-assertions/SKILL.md +32 -0
package/.github/skills/testdriver:mcp-workflow/SKILL.md +410 -0
package/.github/skills/testdriver:mouse-down/SKILL.md +161 -0
package/.github/skills/testdriver:mouse-up/SKILL.md +164 -0
package/.github/skills/testdriver:performing-actions/SKILL.md +51 -0
package/.github/skills/testdriver:press-keys/SKILL.md +348 -0
package/.github/skills/testdriver:quickstart/SKILL.md +161 -0
package/.github/skills/testdriver:reusable-code/SKILL.md +240 -0
package/.github/skills/testdriver:right-click/SKILL.md +123 -0
package/.github/skills/testdriver:running-tests/SKILL.md +181 -0
package/.github/skills/testdriver:screenshot/SKILL.md +167 -0
package/.github/skills/testdriver:scroll/SKILL.md +299 -0
package/.github/skills/testdriver:secrets/SKILL.md +115 -0
package/.github/skills/testdriver:self-hosted/SKILL.md +65 -0
package/.github/skills/testdriver:test-writer/SKILL.md +451 -0
package/.github/skills/testdriver:testdriver/SKILL.md +523 -0
package/.github/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
package/.github/skills/testdriver:type/SKILL.md +357 -0
package/.github/skills/testdriver:variables/SKILL.md +111 -0
package/.github/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
package/.github/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
package/.github/workflows/acceptance-windows-scheduled.yaml +6 -1
package/.github/workflows/acceptance.yaml +0 -36
package/.github/workflows/update-examples.yaml +53 -0
package/CHANGELOG.md +4 -0
package/agent/events.js +1 -0
package/agent/index.js +8 -0
package/agent/lib/commands.js +48 -29
package/agent/lib/redraw.js +3 -1
package/agent/lib/sandbox.js +166 -14
package/agent/lib/sdk.js +142 -3
package/agent/lib/system.js +4 -6
package/ai/skills/testdriver:ai/SKILL.md +204 -0
package/ai/skills/testdriver:assert/SKILL.md +315 -0
package/ai/skills/testdriver:aws-setup/SKILL.md +448 -0
package/ai/skills/testdriver:caching/SKILL.md +124 -0
package/ai/skills/testdriver:captcha/SKILL.md +159 -0
package/ai/skills/testdriver:ci-cd/SKILL.md +602 -0
package/ai/skills/testdriver:click/SKILL.md +286 -0
package/ai/skills/testdriver:client/SKILL.md +372 -0
package/ai/skills/testdriver:cloud/SKILL.md +119 -0
package/ai/skills/testdriver:customizing-devices/SKILL.md +153 -0
package/ai/skills/testdriver:dashcam/SKILL.md +418 -0
package/ai/skills/testdriver:debugging-with-screenshots/SKILL.md +401 -0
package/ai/skills/testdriver:device-config/SKILL.md +317 -0
package/ai/skills/testdriver:double-click/SKILL.md +102 -0
package/ai/skills/testdriver:elements/SKILL.md +605 -0
package/ai/skills/testdriver:enterprise/SKILL.md +114 -0
package/ai/skills/testdriver:examples/SKILL.md +7 -0
package/ai/skills/testdriver:exec/SKILL.md +345 -0
package/ai/skills/testdriver:find/SKILL.md +745 -0
package/ai/skills/testdriver:focus-application/SKILL.md +293 -0
package/ai/skills/testdriver:generating-tests/SKILL.md +36 -0
package/ai/skills/testdriver:hover/SKILL.md +278 -0
package/ai/skills/testdriver:locating-elements/SKILL.md +71 -0
package/ai/skills/testdriver:making-assertions/SKILL.md +32 -0
package/ai/skills/testdriver:mcp-workflow/SKILL.md +410 -0
package/ai/skills/testdriver:mouse-down/SKILL.md +161 -0
package/ai/skills/testdriver:mouse-up/SKILL.md +164 -0
package/ai/skills/testdriver:ocr/SKILL.md +235 -0
package/ai/skills/testdriver:performing-actions/SKILL.md +51 -0
package/ai/skills/testdriver:press-keys/SKILL.md +348 -0
package/ai/skills/testdriver:quickstart/SKILL.md +146 -0
package/ai/skills/testdriver:reusable-code/SKILL.md +240 -0
package/ai/skills/testdriver:right-click/SKILL.md +123 -0
package/ai/skills/testdriver:running-tests/SKILL.md +185 -0
package/ai/skills/testdriver:screenshot/SKILL.md +248 -0
package/ai/skills/testdriver:scroll/SKILL.md +335 -0
package/ai/skills/testdriver:secrets/SKILL.md +115 -0
package/ai/skills/testdriver:self-hosted/SKILL.md +65 -0
package/ai/skills/testdriver:test-writer/SKILL.md +451 -0
package/ai/skills/testdriver:testdriver/SKILL.md +631 -0
package/ai/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
package/ai/skills/testdriver:type/SKILL.md +357 -0
package/ai/skills/testdriver:variables/SKILL.md +111 -0
package/ai/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
package/ai/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
package/debugger/index.html +12 -2
package/docs/v7/examples/scroll-keyboard.mdx +1 -1
package/docs/v7/find.mdx +1 -0
package/examples/config.mjs +1 -1
package/examples/findall-coffee-icons.test.mjs +42 -0
package/examples/flake-diffthreshold-001.test.mjs +9 -0
package/examples/flake-diffthreshold-01.test.mjs +9 -0
package/examples/flake-diffthreshold-05.test.mjs +9 -0
package/examples/{z_flake-noredraw-cache.test.mjs → flake-noredraw-cache.test.mjs} +2 -2
package/examples/{z_flake-noredraw-nocache.test.mjs → flake-noredraw-nocache.test.mjs} +2 -2
package/examples/{z_flake-redraw-cache.test.mjs → flake-redraw-cache.test.mjs} +2 -2
package/examples/{z_flake-redraw-nocache.test.mjs → flake-redraw-nocache.test.mjs} +2 -2
package/examples/flake-rocket-match.test.mjs +30 -0
package/examples/{z_flake-shared.mjs → flake-shared.mjs} +2 -2
package/examples/parse.test.mjs +19 -0
package/examples/scroll-keyboard.test.mjs +1 -1
package/interfaces/cli/lib/base.js +6 -0
package/interfaces/logger.js +51 -13
package/interfaces/vitest-plugin.mjs +137 -0
package/lib/core/index.d.ts +22 -0
package/lib/init-project.js +105 -6
package/lib/vitest/hooks.mjs +2 -5
package/lib/vitest/setup-disable-defender.mjs +52 -0
package/package.json +2 -1
package/sdk-log-formatter.js +90 -0
package/sdk.d.ts +88 -51
package/sdk.js +126 -18
package/setup/aws/disable-defender.sh +42 -0
package/vitest.config.mjs +1 -3
package/examples/z_flake-diffthreshold-001.test.mjs +0 -9
package/examples/z_flake-diffthreshold-01.test.mjs +0 -9
package/examples/z_flake-diffthreshold-05.test.mjs +0 -9
/package/{examples → manual}/captcha-api.test.mjs +0 -0

package/ai/skills/testdriver:what-is-testdriver/SKILL.md ADDED Viewed

@@ -0,0 +1,54 @@
+---
+name: testdriver:what-is-testdriver
+description: Reliably test your most difficult user flows
+---
+<!-- Generated from what-is-testdriver.mdx. DO NOT EDIT. -->
+## The problem with modern testing tools
+Modern testing tools like Playwright are designed to test a single web application, running in a single browser tab using selectors.
+However, selectors are often either unreliable or unavailable in complex scenarios, leading to brittle and flaky tests:
+| Challenge | Problem | Examples |
+|-----------|---------|----------|
+| **Fast moving teams** | Frequently change UI structure, breaking CSS/XPath selectors | Agile teams, startups, vibe-coders |
+| **Dynamic content** | Cannot be targeted with selectors | AI chatbots, PDFs, images, videos |
+| **Software you don't own** | May lack proper accessibility attributes | Other websites, extensions, third-party applications |
+| **Multi-application workflows** | Cannot be tested with web-only tools | Desktop apps, browser extensions, IDEs |
+| **Visual states** | Impossible to verify with code-based selectors | Charts, graphs, videos, images, spelling errors, UI layout |
+## The TestDriver Solution
+TestDriver is a complete testing platform built specifically for handling these scenarios. It consists of a Javascript SDK, hosted infrastructure, and debugging tools that make it easy to write, run, and maintain tests for your most difficult user flows.
+### Javascript SDK
+Here is an example of a TestDriver test that installs a production Chrome extension from the Chrome Web Store and verifies that it appears in the extensions menu:
+```javascript Installing Loom from the Chrome Web Store
+import { describe, expect, it } from "vitest";
+import { TestDriver } from "testdriverai/vitest/hooks";
+describe("Chrome Extension Test", () => {
+  const testdriver = TestDriver(context);
+  // Launch Chrome with Loom loaded by its Chrome Web Store ID
+  await testdriver.provision.chromeExtension({
+    extensionId: 'liecbddmkiiihnedobmlmillhodjkdmb'
+  });
+  // Click on the extensions button (puzzle piece icon) in Chrome toolbar
+  const extensionsButton = await testdriver.find("The puzzle-shaped icon in the Chrome toolbar.");
+  await extensionsButton.click();
+  // Look for Loom in the extensions menu
+  const loomExtension = await testdriver.find("Loom extension in the extensions dropdown");
+  expect(loomExtension.found()).toBeTruthy();
+});
+```
+<Tip>[vitest](https://vitest.dev/) is the preferred test runner for TestDriver.</Tip>
+,

package/debugger/index.html CHANGED Viewed

@@ -178,7 +178,7 @@
       #vm-iframe {
         display: none;
         border: none;
-        pointer-events: auto;
+        pointer-events: none; /* view-only by default, enabled on user click */
         position: absolute;
         top: 0px;
         left: 0px;
@@ -188,6 +188,10 @@
         overflow: hidden;
       }
+      #vm-iframe.interactive {
+        pointer-events: auto;
+      }
       /* Loading screen styles */
       .loading-screen {
         position: absolute;
@@ -564,6 +568,7 @@
       const hideInteractionOverlay = () => {
         interactionOverlay.classList.remove("visible");
         isInteractionEnabled = true;
+        iframe.classList.add("interactive"); // Enable pointer events on iframe
         // Clear any existing timeout
         if (interactionTimeout) {
@@ -571,6 +576,11 @@
         }
       };
+      const disableInteraction = () => {
+        isInteractionEnabled = false;
+        iframe.classList.remove("interactive"); // Disable pointer events on iframe
+      };
       // Event listeners for interaction overlay
       overlay.addEventListener("mouseenter", showInteractionOverlay);
       overlay.addEventListener("mouseleave", () => {
@@ -754,7 +764,7 @@
       // Handle window blur/focus for screen locking
       window.addEventListener("blur", () => {
         showInteractionOverlay();
-        isInteractionEnabled = false;
+        disableInteraction();
       });
     </script>
   </body>

package/docs/v7/examples/scroll-keyboard.mdx CHANGED Viewed

@@ -38,7 +38,7 @@ describe("Scroll Keyboard Test", () => {
     // Navigate to https://www.webhamster.com/
     await testdriver.focusApplication("Google Chrome");
     const urlBar = await testdriver.find(
-      "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page",
+      "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page", {zoom: true}
     );
     await urlBar.click();
     await testdriver.pressKeys(["ctrl", "a"]);

package/docs/v7/find.mdx CHANGED Viewed

@@ -274,6 +274,7 @@ This two-phase approach gives the AI a higher-resolution view of the target area
   - Selecting from a grid of similar items
   - Targeting elements in dense UI areas
   - The default locate is clicking the wrong similar element
+  - You get an AI verification rejection like "The crosshair is located in the empty space of the browser's tab bar/title bar area" — this means the initial locate was imprecise and zoom will help the AI pinpoint the correct element
 </Tip>
 ```javascript

package/examples/config.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 export const getDefaults = (context) => ({
   ip: context.ip || process.env.TD_IP,
-  redraw: false,
+  redraw: { enabled: false },
   preview: 'ide',
 });

package/examples/findall-coffee-icons.test.mjs ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * TestDriver SDK - FindAll Coffee Icons Test
+ * Loads a random icon grid and uses findAll() to locate and click all 4 coffee cup icons
+ */
+import { describe, expect, it } from "vitest";
+import { TestDriver } from "../lib/vitest/hooks.mjs";
+import { getDefaults } from "./config.mjs";
+describe("FindAll Coffee Icons", () => {
+  it("should find and click all 4 coffee cup icons", async (context) => {
+    const testdriver = TestDriver(context, {
+      ...getDefaults(context),
+      headless: true,
+    });
+    await testdriver.provision.chrome({
+      url: "https://v0-random-icon-grid.vercel.app/",
+    });
+    // Use findAll to locate all coffee cup icons on the page
+    const coffeeIcons = await testdriver.findAll("coffee cup icon, there are exactly 4 on the page");
+    // Log each icon's coordinates
+    console.log(`Found ${coffeeIcons.length} coffee icons:`);
+    coffeeIcons.forEach((icon, i) => {
+      console.log(`  Icon ${i + 1}: (${icon.x}, ${icon.y}) center=(${icon.centerX}, ${icon.centerY})`);
+    });
+    // Verify we found 3 or 4 coffee icons
+    expect(coffeeIcons.length).toBeGreaterThanOrEqual(3);
+    expect(coffeeIcons.length).toBeLessThanOrEqual(4);
+    // Click each coffee cup icon
+    for (const icon of coffeeIcons) {
+      await icon.click();
+    }
+    // Verify the selection count is displayed
+    await testdriver.assert("the page says 'Selected: 3 / 4' or 'Matched 4 of a kind!'");
+  });
+});

package/examples/flake-diffthreshold-001.test.mjs ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Popup Loading - diffThreshold=0.01, cache=true
+ */
+import { popupLoadingTest } from "./flake-shared.mjs";
+popupLoadingTest("screen=0.01, cache=true", {
+  redraw: { enabled: true, thresholds: { screen: 0.01 } },
+  cache: true,
+});

package/examples/flake-diffthreshold-01.test.mjs ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Popup Loading - diffThreshold=0.1, cache=false
+ */
+import { popupLoadingTest } from "./flake-shared.mjs";
+popupLoadingTest("screen=0.1, cache=false", {
+  redraw: { enabled: true, thresholds: { screen: 0.1 } },
+  cache: false,
+});

package/examples/flake-diffthreshold-05.test.mjs ADDED Viewed

@@ -0,0 +1,9 @@
+/**
+ * Popup Loading - diffThreshold=0.5, cache=true
+ */
+import { popupLoadingTest } from "./flake-shared.mjs";
+popupLoadingTest("screen=0.5, cache=true", {
+  redraw: { enabled: true, thresholds: { screen: 0.5 } },
+  cache: true,
+});

package/examples/{z_flake-noredraw-cache.test.mjs → flake-noredraw-cache.test.mjs} RENAMED Viewed

@@ -1,9 +1,9 @@
 /**
  * Popup Loading - redraw=false, cache=true
  */
-import { popupLoadingTest } from "./z_flake-shared.mjs";
+import { popupLoadingTest } from "./flake-shared.mjs";
 popupLoadingTest("redraw=false, cache=true", {
-  redraw: false,
+  redraw: { enabled: false },
   cache: true,
 });

package/examples/{z_flake-noredraw-nocache.test.mjs → flake-noredraw-nocache.test.mjs} RENAMED Viewed

@@ -1,9 +1,9 @@
 /**
  * Popup Loading - redraw=false, cache=false
  */
-import { popupLoadingTest } from "./z_flake-shared.mjs";
+import { popupLoadingTest } from "./flake-shared.mjs";
 popupLoadingTest("redraw=false, cache=false", {
-  redraw: false,
+  redraw: { enabled: false },
   cache: false,
 });

package/examples/{z_flake-redraw-cache.test.mjs → flake-redraw-cache.test.mjs} RENAMED Viewed

@@ -1,9 +1,9 @@
 /**
  * Popup Loading - redraw=true, cache=true
  */
-import { popupLoadingTest } from "./z_flake-shared.mjs";
+import { popupLoadingTest } from "./flake-shared.mjs";
 popupLoadingTest("redraw=true, cache=true", {
-  redraw: true,
+  redraw: { enabled: true },
   cache: true,
 });

package/examples/{z_flake-redraw-nocache.test.mjs → flake-redraw-nocache.test.mjs} RENAMED Viewed

@@ -1,9 +1,9 @@
 /**
  * Popup Loading - redraw=true, cache=false
  */
-import { popupLoadingTest } from "./z_flake-shared.mjs";
+import { popupLoadingTest } from "./flake-shared.mjs";
 popupLoadingTest("redraw=true, cache=false", {
-  redraw: true,
+  redraw: { enabled: true },
   cache: false,
 });

package/examples/flake-rocket-match.test.mjs ADDED Viewed

@@ -0,0 +1,30 @@
+/**
+ * Popup Loading - Skip straight to the rocket match (skipToIcons=true)
+ */
+import { describe, expect, it } from "vitest";
+import { TestDriver } from "../lib/vitest/hooks.mjs";
+describe("Rocket Match (skipToIcons)", () => {
+  it("should find the rocket in the icon grid", async (context) => {
+    const testdriver = TestDriver(context, {
+      preview: "ide",
+      ip: context.ip || process.env.TD_IP,
+    });
+    await testdriver.provision.chrome({
+      url: "https://v0-popup-with-loading-bar.vercel.app/?skipToIcons=true",
+    });
+    // Wait for the 5x5 grid of images to fully load and click the rocket
+    await testdriver.find("The icon of a rocket in the 5x5 grid of images", {
+      timeout: 60000,
+      zoom: 1,
+    }).click();
+    // Assert the success message appears
+    const rocketResult = await testdriver.assert(
+      "The text 'You found the rocket!' is visible on the page"
+    );
+    expect(rocketResult).toBeTruthy();
+  });
+});

package/examples/{z_flake-shared.mjs → flake-shared.mjs} RENAMED Viewed

@@ -20,7 +20,7 @@ export function popupLoadingTest(label, options = {}) {
       await testdriver.screenshot();
       // Accept the cookie banner to trigger the loading process
-      let acceptButton = await testdriver.find("Accept All button on the cookie banner", {timeout: 60000});
+      let acceptButton = await testdriver.find("Accept All button on the cookie banner", {timeout: 10000});
       if (await acceptButton.found()) {
         await acceptButton.click();
@@ -41,7 +41,7 @@ export function popupLoadingTest(label, options = {}) {
       await testdriver.find("Continue button in the modal").click();
       // Wait for the 5x5 grid of images to fully load (up to 60s) and click the rocket
-      await testdriver.find("rocket image in the 5x5 grid", { timeout: 60000, cacheThreshold: -1 }).click();
+      await testdriver.find("The icon of a rocket in the 5x5 grid of images", { timeout: 60000, zoom: true }).click();
       // Assert the success message appears
       const rocketResult = await testdriver.assert("The text 'You found the rocket!' is visible on the page");

package/examples/parse.test.mjs ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * TestDriver SDK - Parse Test (Vitest)
+ * Opens Airbnb and runs the .parse() SDK command to analyze the screen.
+ */
+import { describe, it } from "vitest";
+import { TestDriver } from "../lib/vitest/hooks.mjs";
+import { getDefaults } from "./config.mjs";
+describe("Parse Test", () => {
+  it("should open Airbnb and parse the screen", async (context) => {
+    const testdriver = TestDriver(context, { ...getDefaults(context) });
+    await testdriver.provision.chrome({ url: "https://www.airbnb.com" });
+    const result = await testdriver.parse();
+    console.log(`Found ${result.elements?.length || 0} elements`);
+    console.log(JSON.stringify(result, null, 2));
+  });
+});

package/examples/scroll-keyboard.test.mjs CHANGED Viewed

@@ -16,7 +16,7 @@ describe("Scroll Keyboard Test", () => {
     // Navigate to https://www.webhamster.com/
     await testdriver.focusApplication("Google Chrome");
     const urlBar = await testdriver.find(
-      "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page",
+      "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page", {zoom: true}
     );
     await urlBar.click();
     await testdriver.pressKeys(["ctrl", "a"]);

package/interfaces/cli/lib/base.js CHANGED Viewed

@@ -48,6 +48,8 @@ class BaseCommand extends Command {
     this.agent.sandbox.send({
       type: "output",
       output: Buffer.from(message).toString("base64"),
+    }).catch(() => {
+      // Silently ignore output send failures to prevent infinite loops
     });
   }
@@ -107,6 +109,10 @@ class BaseCommand extends Command {
       });
       this.agent.emitter.on("error:*", (message) => {
+        // Don't forward sandbox errors back to sandbox - this creates an infinite loop
+        // (sandbox error → error:* event → sendToSandbox → output message → sandbox error → ...)
+        const event = this.agent.emitter.event;
+        if (event === "error:sandbox") return;
         this.sendToSandbox(message);
       });
     });

package/interfaces/logger.js CHANGED Viewed

@@ -11,31 +11,69 @@ class CustomTransport extends Transport {
     this.level = opts.level || "info";
     this.logStore = opts.logStore || []; // You could connect to a DB or API here
     this.sandbox = null;
+    // Batching configuration to reduce websocket traffic
+    this.batchQueue = [];
+    this.batchTimeout = null;
+    this.BATCH_INTERVAL_MS = 100;  // Flush every 100ms
+    this.MAX_BATCH_SIZE = 20;      // Or when batch reaches 20 messages
   }
-  log(info, callback) {
+  _flushBatch() {
+    if (this.batchQueue.length === 0) return;
+    // Capture and clear the batch atomically to prevent duplicate sends
+    const batch = this.batchQueue;
+    this.batchQueue = [];
+    this.batchTimeout = null;
     try {
-      const { message } = info;
       if (!this.sandbox) {
         this.sandbox = require("../agent/lib/sandbox");
       }
       if (this.sandbox && this.sandbox.instanceSocketConnected) {
-        if (typeof message === "object") {
-          console.log(chalk.cyan("protecting against base64 error"));
-          console.log(message);
-          return;
-        }
+        // Send all batched messages as a single combined output
+        const combinedOutput = batch.join('\n');
         this.sandbox.send({
           type: "output",
-          output: Buffer.from(message).toString("base64"),
+          output: Buffer.from(combinedOutput).toString("base64"),
+        }).catch((e) => {
+          // Re-queue failed messages for retry on next flush
+          console.error("Error sending log batch:", e);
         });
       }
+    } catch (e) {
+      // Re-queue on synchronous error as well
+      this.batchQueue = batch.concat(this.batchQueue);
+      console.error("Error flushing log batch:", e);
+    }
+  }
+  log(info, callback) {
+    try {
+      const { message } = info;
+      if (typeof message === "object") {
+        console.log(chalk.cyan("protecting against base64 error"));
+        console.log(message);
+        callback();
+        return;
+      }
+      // Add to batch queue instead of sending immediately
+      this.batchQueue.push(message);
+      // Flush if batch is full
+      if (this.batchQueue.length >= this.MAX_BATCH_SIZE) {
+        if (this.batchTimeout) {
+          clearTimeout(this.batchTimeout);
+        }
+        this._flushBatch();
+      } else if (!this.batchTimeout) {
+        // Schedule flush after interval
+        this.batchTimeout = setTimeout(() => this._flushBatch(), this.BATCH_INTERVAL_MS);
+      }
     } catch (e) {
       console.error("Error in CustomTransport log method:", e);
     }

package/interfaces/vitest-plugin.mjs CHANGED Viewed

@@ -2,6 +2,7 @@ import { execSync } from "child_process";
 import crypto from "crypto";
 import fs from "fs";
 import { createRequire } from "module";
+import os from "os";
 import path from "path";
 import { postOrUpdateTestResults } from "../lib/github-comment.mjs";
 import { setTestRunInfo } from "./shared-test-state.mjs";
@@ -9,6 +10,125 @@ import { setTestRunInfo } from "./shared-test-state.mjs";
 // Use createRequire to import CommonJS modules without esbuild processing
 const require = createRequire(import.meta.url);
+// Import Sentry for error reporting
+const Sentry = require("@sentry/node");
+// Track if Sentry has been initialized
+let sentryInitialized = false;
+/**
+ * Initialize Sentry for test failure reporting
+ * Uses same configuration as lib/sentry.js for consistency
+ */
+function initializeSentry() {
+  if (sentryInitialized) return;
+  // Respect telemetry opt-out
+  if (process.env.TD_TELEMETRY === "false") {
+    return;
+  }
+  try {
+    const version = resolveTestDriverVersion() || "unknown";
+    Sentry.init({
+      dsn:
+        process.env.SENTRY_DSN ||
+        "https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
+      environment: "vitest",
+      release: `testdriverai@${version}`,
+      sampleRate: 1.0,
+      tracesSampleRate: 1.0,
+      enableLogs: true,
+      integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
+      initialScope: {
+        tags: {
+          platform: os.platform(),
+          arch: os.arch(),
+          nodeVersion: process.version,
+          runner: "vitest",
+        },
+      },
+      // Don't send user-cancelled errors
+      beforeSend(event, hint) {
+        const error = hint.originalException;
+        if (error && error.message && error.message.includes("User cancelled")) {
+          return null;
+        }
+        return event;
+      },
+    });
+    sentryInitialized = true;
+    logger.debug("Sentry initialized for vitest");
+  } catch (err) {
+    // Sentry init failed - continue without it
+    logger.debug("Failed to initialize Sentry:", err.message);
+  }
+}
+/**
+ * Capture a test failure in Sentry
+ * @param {Object} params - Test failure parameters
+ * @param {string} params.testName - Name of the test
+ * @param {string} params.testFile - File path of the test
+ * @param {string} params.errorMessage - Error message
+ * @param {string} [params.errorStack] - Error stack trace
+ * @param {string} [params.sessionId] - Session ID if available
+ * @param {string} [params.platform] - Platform (windows, mac, linux)
+ * @param {number} [params.duration] - Test duration in ms
+ */
+function captureTestFailure({ testName, testFile, errorMessage, errorStack, sessionId, platform, duration }) {
+  if (!sentryInitialized || process.env.TD_TELEMETRY === "false") return;
+  try {
+    // Create an error object with the test failure details
+    const error = new Error(errorMessage);
+    error.name = "TestFailure";
+    if (errorStack) {
+      error.stack = errorStack;
+    }
+    Sentry.withScope((scope) => {
+      scope.setTag("test.name", testName);
+      scope.setTag("test.file", testFile);
+      scope.setTag("test.status", "failed");
+      if (sessionId) {
+        scope.setTag("session", sessionId);
+      }
+      if (platform) {
+        scope.setTag("platform", platform);
+      }
+      scope.setContext("test", {
+        name: testName,
+        file: testFile,
+        duration: duration,
+        sessionId: sessionId,
+        platform: platform,
+      });
+      Sentry.captureException(error);
+    });
+  } catch (err) {
+    logger.debug("Failed to capture test failure in Sentry:", err.message);
+  }
+}
+/**
+ * Flush Sentry events before process exit
+ * @param {number} [timeout=2000] - Timeout in ms
+ */
+async function flushSentry(timeout = 2000) {
+  if (!sentryInitialized) return;
+  try {
+    await Sentry.flush(timeout);
+  } catch (err) {
+    // Ignore flush errors
+  }
+}
 /**
  * Resolve the TestDriver SDK version using multiple strategies.
  * Similar to resolveVitestVersion(), guards against import.meta.url rewriting.
@@ -710,6 +830,9 @@ class TestDriverReporter {
     this.ctx = ctx;
     logger.debug("onInit called - UPDATED VERSION");
+    // Initialize Sentry for error reporting
+    initializeSentry();
     // Store project root for making file paths relative
     pluginState.projectRoot = ctx.config.root || process.cwd();
     logger.debug("Project root:", pluginState.projectRoot);
@@ -936,6 +1059,9 @@ class TestDriverReporter {
     } catch (error) {
       logger.error("Failed to complete test run:", error.message);
       logger.debug("Error stack:", error.stack);
+    } finally {
+      // Flush any pending Sentry events before process exits
+      await flushSentry();
     }
   }
@@ -1028,6 +1154,17 @@ class TestDriverReporter {
         const error = result.errors[0];
         errorMessage = error.message;
         errorStack = error.stack;
+        // Report test failure to Sentry
+        captureTestFailure({
+          testName: test.name,
+          testFile,
+          errorMessage,
+          errorStack,
+          sessionId,
+          platform: platform || pluginState.detectedPlatform,
+          duration,
+        });
       }
       const suiteName = test.suite?.name;

package/lib/core/index.d.ts CHANGED Viewed

@@ -134,6 +134,28 @@ export interface TestDriverOptions {
   analytics?: boolean;
   /**
+   * Cache configuration
+   * Set to false to disable caching entirely.
+   * Set to an object to configure thresholds.
+   * @example { cache: { enabled: true, thresholds: { find: { screen: 0.05, element: 0.8 }, assert: 0.05 } } }
+   */
+  cache?: boolean | {
+    enabled?: boolean;
+    thresholds?: {
+      /** Thresholds for find operations */
+      find?: {
+        /** Pixel diff threshold for screen comparison (0-1, default 0.05 = 5% diff allowed) */
+        screen?: number;
+        /** OpenCV template match threshold for element matching (0-1, default 0.8 = 80% correlation) */
+        element?: number;
+      };
+      /** Pixel diff threshold for assert operations (0-1, default 0.05 = 5% diff allowed) */
+      assert?: number;
+    };
+  };
+  /**
+   * @deprecated Use cache.thresholds instead
    * Cache thresholds for find operations
    */
   cacheThresholds?: {