npm - testdriverai - Versions diffs - 7.9.59-test → 7.9.61-canary - Mend

testdriverai 7.9.59-test → 7.9.61-canary

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/agent/lib/sandbox.js +162 -9
package/ai/skills/testdriver-aws-setup/SKILL.md +1 -1
package/ai/skills/testdriver-ci-cd/SKILL.md +1 -1
package/ai/skills/testdriver-client/SKILL.md +5 -5
package/ai/skills/testdriver-customizing-devices/SKILL.md +1 -1
package/ai/skills/testdriver-debugging-with-screenshots/SKILL.md +14 -0
package/ai/skills/testdriver-device-config/SKILL.md +5 -5
package/ai/skills/testdriver-find/SKILL.md +33 -0
package/ai/skills/testdriver-hosted/SKILL.md +1 -1
package/ai/skills/testdriver-machine-setup/SKILL.md +262 -0
package/ai/skills/testdriver-parse/SKILL.md +1 -1
package/ai/skills/testdriver-reusable-code/SKILL.md +3 -3
package/ai/skills/testdriver-running-tests/SKILL.md +1 -1
package/ai/skills/testdriver-screenshot/SKILL.md +4 -4
package/ai/skills/testdriver-scroll/SKILL.md +25 -25
package/docs/docs.json +1 -0
package/docs/v7/machine-setup.mdx +262 -0
package/examples/config.mjs +0 -1
package/package.json +1 -1
package/sdk.js +1 -1

package/agent/lib/sandbox.js CHANGED Viewed

@@ -771,14 +771,160 @@ const createSandbox = function (emitter, analytics, sessionInstance) {
         break; // slot approved and provisioned — exit the while loop
       }
+      // ─── Handle async provisioning status ─────────────────────────────
+      // API may return early with status: 'provisioning' while a background
+      // job is still creating/configuring the sandbox. Prefer Ably control
+      // events for completion and only fall back to authenticate polling when
+      // an event is not observed in time.
+      var provisioningPollStart = Date.now();
+      var provisioningTimeoutMs = 10 * 60 * 1000;
+      while (reply.status === 'provisioning') {
+        var provisionElapsed = Date.now() - provisioningPollStart;
+        if (provisionElapsed >= provisioningTimeoutMs) {
+          var provisioningTimeoutErr = new Error(
+            "Sandbox provisioning timed out after " +
+            Math.round(provisionElapsed / 1000) +
+            "s" +
+            (this._sandboxId || (reply && reply.sandboxId)
+              ? " for sandbox " + (this._sandboxId || (reply && reply.sandboxId))
+              : "") +
+            ". Last known status: provisioning"
+          );
+          provisioningTimeoutErr.responseData = reply;
+          throw provisioningTimeoutErr;
+        }
+        logger.log(
+          'Waiting for sandbox to be ready...'
+        );
+        var self = this;
+        var provisioningEvent = null;
+        if (this._sessionChannel) {
+          provisioningEvent = await new Promise(function (resolve) {
+            var resolved = false;
+            var eventTimeout = 30000;
+            function finish(data) {
+              if (resolved) return;
+              resolved = true;
+              clearTimeout(timer);
+              try { self._sessionChannel.unsubscribe('control', onProvisionCtrl); } catch (_) {}
+              resolve(data || null);
+            }
+            function onProvisionCtrl(msg) {
+              var data = msg && msg.data;
+              if (!data) return;
+              if (data.type === 'provisioning.started') {
+                logger.log((data.message || 'Provisioning started') + (data.os ? ' (' + data.os + ')' : ''));
+                return;
+              }
+              if (data.type === 'provisioning.progress') {
+                var progress = data.message || ('Provisioning step: ' + (data.phase || 'in-progress'));
+                logger.log(progress);
+                return;
+              }
+              if (data.type === 'provisioning.completed' || data.type === 'provisioning.failed') {
+                finish(data);
+              }
+            }
+            var timer = setTimeout(function () {
+              finish(null);
+            }, eventTimeout);
+            if (timer.unref) timer.unref();
+            try {
+              self._sessionChannel.subscribe('control', onProvisionCtrl);
+              // Check recent history to close race window where event was
+              // published before this subscription was attached.
+              self._sessionChannel.history({ limit: 20 }).then(function (page) {
+                if (!page || !page.items || resolved) return;
+                for (var i = 0; i < page.items.length; i++) {
+                  var item = page.items[i];
+                  var data = item && item.data;
+                  if (item && item.name === 'control' && data && (data.type === 'provisioning.completed' || data.type === 'provisioning.failed')) {
+                    finish(data);
+                    return;
+                  }
+                }
+              }).catch(function (err) {
+                logger.warn('Provisioning history lookup failed (non-fatal): ' + (err.message || err));
+              });
+            } catch (subscribeErr) {
+              logger.warn('Provisioning event subscribe failed (non-fatal): ' + (subscribeErr.message || subscribeErr));
+              finish(null);
+            }
+          });
+        }
+        if (provisioningEvent && provisioningEvent.type === 'provisioning.failed') {
+          var eventErr = new Error(
+            provisioningEvent.errorMessage || 'Failed while waiting for sandbox provisioning',
+          );
+          eventErr.responseData = provisioningEvent;
+          throw eventErr;
+        }
+        if (provisioningEvent && provisioningEvent.type === 'provisioning.completed') {
+          // Event carries the final payload shape from the API, so we can stop
+          // polling authenticate in the common case.
+          reply = Object.assign({}, reply, provisioningEvent);
+          if (reply.status === 'provisioning') {
+            reply.status = 'ready';
+          }
+          if (reply.success !== true) {
+            reply.success = true;
+          }
+          break;
+        }
+        await new Promise(function (resolve) {
+          var t = setTimeout(resolve, 10000);
+          if (t.unref) t.unref();
+        });
+        var pollBody = {
+          apiKey: this.apiKey,
+          version: version,
+          os: message.os || this.os || 'linux',
+          session: sessionId,
+          apiRoot: this.apiRoot,
+          sandboxId: this._sandboxId || (reply && reply.sandboxId),
+          slotApproved: true,
+        };
+        if (message.resolution) pollBody.resolution = message.resolution;
+        if (message.ci) pollBody.ci = message.ci;
+        if (message.ami) pollBody.ami = message.ami;
+        if (message.instanceType) pollBody.instanceType = message.instanceType;
+        if (message.e2bTemplateId) pollBody.e2bTemplateId = message.e2bTemplateId;
+        if (message.keepAlive !== undefined) pollBody.keepAlive = message.keepAlive;
+        reply = await this._httpPostWithConcurrencyRetry(
+          "/api/v7/sandbox/authenticate",
+          pollBody,
+          timeout,
+        );
+        if (!reply.success && reply.status !== 'provisioning') {
+          var provisioningErr = new Error(
+            reply.errorMessage || "Failed while waiting for sandbox provisioning",
+          );
+          provisioningErr.responseData = reply;
+          throw provisioningErr;
+        }
+      }
       if (message.type === "create") {
         // E2B (Linux) sandboxes return a url directly.
         // We still need to wait for runner.ready since sandbox-agent.js runs inside E2B.
         const isE2B = !!reply.url;
-        const runnerIp = reply.runner && reply.runner.ip;
-        const noVncPort = reply.runner && reply.runner.noVncPort;
-        const runnerVncUrl = reply.runner && reply.runner.vncUrl;
+        let runnerIp = reply.runner && reply.runner.ip;
+        let noVncPort = reply.runner && reply.runner.noVncPort;
+        let runnerVncUrl = reply.runner && reply.runner.vncUrl;
         // Log image version info (AMI for Windows, E2B template for Linux)
         if (reply.imageVersion) {
@@ -801,12 +947,12 @@ const createSandbox = function (emitter, analytics, sessionInstance) {
         // For presence-based Windows runners (reply.runner already set), the runner
         // is already listening so we can skip the wait.
         var self = this;
-        const needsReadyWait = this._sessionChannel && (isE2B || !reply.runner);
+        const needsReadyWait = this._sessionChannel && (isE2B || !reply.runner || (reply.runner && reply.runner.os === 'windows'));
         if (needsReadyWait) {
           logger.log('Waiting for runner agent to signal readiness...');
-          // E2B (Linux) sandboxes need extra time: S3 upload + npm install can add 60-120s on top of sandbox boot
-          // EC2 (Windows) cold starts can be slow due to AV scanning and native module loading
-          var readyTimeout = isE2B ? 300000 : 180000; // 5 min for E2B (S3+npm), 3 min for EC2
+          // E2B (Linux) sandboxes need extra time: S3 upload + npm install can add 60-120s on top of sandbox boot.
+          // Hosted EC2 (Windows) can also take several minutes when launching/provisioning in background.
+          var readyTimeout = isE2B ? 300000 : 300000; // 5 min for E2B and EC2
           await new Promise(function (resolve, reject) {
             var resolved = false;
             var waitStart = Date.now();
@@ -817,7 +963,7 @@ const createSandbox = function (emitter, analytics, sessionInstance) {
               clearInterval(progressTimer);
               self._sessionChannel.unsubscribe('control', onCtrl);
               // Update runner info if provided
-              if (data && data.os) reply.runner = reply.runner || {};
+              if (data && (data.os || data.ip)) reply.runner = reply.runner || {};
               if (data && data.os && reply.runner) reply.runner.os = data.os;
               if (data && data.ip && reply.runner) reply.runner.ip = data.ip;
               if (data && data.runnerVersion && reply.runner) reply.runner.version = data.runnerVersion;
@@ -902,6 +1048,13 @@ const createSandbox = function (emitter, analytics, sessionInstance) {
             }
           });
         }
+        // Refresh runner metadata after runner.ready wait because the wait handler
+        // can populate reply.runner fields from control messages.
+        runnerIp = reply.runner && reply.runner.ip;
+        noVncPort = reply.runner && reply.runner.noVncPort;
+        runnerVncUrl = reply.runner && reply.runner.vncUrl;
         // Prefer the full vncUrl reported by the runner (infrastructure-agnostic).
         // For E2B sandboxes, use the url from the API reply.
         // Fall back to constructing from ip + noVncPort for older runners.

package/ai/skills/testdriver-aws-setup/SKILL.md CHANGED Viewed

@@ -192,7 +192,7 @@ Tests should use `context.ip || process.env.TD_IP` for the IP configuration:
 ```javascript
 import { describe, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("My Test", () => {
   it("should run on self-hosted instance", async (context) => {

package/ai/skills/testdriver-ci-cd/SKILL.md CHANGED Viewed

@@ -571,7 +571,7 @@ When using multi-platform testing, read the `TD_OS` environment variable in your
 ```javascript tests/cross-platform.test.mjs
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Cross-platform tests", () => {
   it("should work on both Linux and Windows", async (context) => {

package/ai/skills/testdriver-client/SKILL.md CHANGED Viewed

@@ -44,7 +44,7 @@ const testdriver = new TestDriver(apiKey, options)
       Enable or disable console logging
     </ParamField>
-    <ParamField path="autoScreenshots" type="boolean" default="true">
+    <ParamField path="autoScreenshots" type="boolean" default="false">
       Automatically capture screenshots before and after each command. Screenshots are saved to `.testdriver/screenshots/<test>/` with descriptive filenames that include the line number and action name. Format: `<seq>-<action>-<phase>-L<line>-<description>.png`
     </ParamField>
@@ -56,10 +56,6 @@ const testdriver = new TestDriver(apiKey, options)
       Reconnect to the last used sandbox instead of creating a new one. When `true`, provision methods (`chrome`, `vscode`, `installer`, etc.) will be skipped since the application is already running. Throws error if no previous sandbox exists.
     </ParamField>
-    <ParamField path="keepAlive" type="number" default="60000">
-      Keep sandbox alive for the specified number of milliseconds after disconnect. Set to `0` to terminate immediately on disconnect. Useful for debugging or reconnecting to the same sandbox.
-    </ParamField>
     <ParamField path="preview" type="string" default="browser">
       Preview mode for live test visualization:
       - `"browser"` — Opens debugger in default browser (default)
@@ -274,6 +270,10 @@ await testdriver.connect(options)
     <ParamField path="headless" type="boolean" default="false">
       **Deprecated**: Use `preview: "none"` instead. Run in headless mode without opening the debugger.
     </ParamField>
+    <ParamField path="keepAlive" type="number" default="60000">
+      Keep sandbox alive for the specified number of milliseconds after disconnect. Set to `0` to terminate immediately on disconnect. Useful for debugging or reconnecting to the same sandbox.
+    </ParamField>
   </Expandable>
 </ParamField>

package/ai/skills/testdriver-customizing-devices/SKILL.md CHANGED Viewed

@@ -39,7 +39,7 @@ const testdriver = TestDriver(context, {
   // === Recording & Screenshots ===
   dashcam: true,             // Enable/disable Dashcam video recording (default: true)
-  autoScreenshots: true,     // Capture screenshots before/after each command (default: true)
+  autoScreenshots: true,     // Capture screenshots before/after each command (default: false)
   // === AI Configuration ===
   ai: {                      // Global AI sampling configuration

package/ai/skills/testdriver-debugging-with-screenshots/SKILL.md CHANGED Viewed

@@ -328,6 +328,20 @@ Understanding the directory structure helps with efficient screenshot viewing:
 - All screenshots are PNG format
 - Disable automatic screenshots with `autoScreenshots: false` if needed
+## Interaction List Sidebar (Source of Truth)
+When viewing a test run in the TestDriver console, the **interaction list sidebar** displays a screenshot for each interaction call (find, click, type, assert, etc.). These screenshots show exactly what was on the screen at the time each interaction was executed.
+<Note>
+  **The sidebar screenshots are the source of truth.** If a test is behaving unexpectedly, check the screenshot attached to the specific interaction in the sidebar — it shows precisely what the AI saw when making its decision. This is more reliable than inferring screen state from test logs or local screenshots alone.
+</Note>
+Use the interaction list to:
+- **Verify what the AI saw** — confirm the correct page/state was visible when `find()` or `assert()` ran
+- **Debug misclicks** — see whether the target element was actually on screen
+- **Identify timing issues** — spot cases where the UI hadn't finished loading before an interaction fired
+- **Compare runs** — review interaction screenshots across multiple runs to catch flaky behavior
 ## Integration with Test Development
 ### During MCP Interactive Development

package/ai/skills/testdriver-device-config/SKILL.md CHANGED Viewed

@@ -28,7 +28,7 @@ await testdriver.provision.chrome({
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Login Flow", () => {
   it("should log in successfully", async (context) => {
@@ -113,7 +113,7 @@ await testdriver.provision.chromeExtension({
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Chrome Extension Test", () => {
   it("should load and interact with extension", async (context) => {
@@ -187,7 +187,7 @@ const filePath = await testdriver.provision.installer({
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Desktop App Test", () => {
   it("should install and launch app", async (context) => {
@@ -209,7 +209,7 @@ describe("Desktop App Test", () => {
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Windows App Test", () => {
   it("should install on Windows", async (context) => {
@@ -276,7 +276,7 @@ await testdriver.provision.vscode({
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("VS Code Test", () => {
   it("should open workspace with extensions", async (context) => {

package/ai/skills/testdriver-find/SKILL.md CHANGED Viewed

@@ -366,6 +366,39 @@ This two-phase approach gives the AI a higher-resolution view of the target area
   - You need extra precision for closely spaced UI elements
 </Tip>
+## Verify Mode
+Verify mode is **disabled by default**. When enabled, a second AI call checks that the coordinates returned by `find()` actually correspond to the requested element, catching hallucinated or incorrect positions.
+```javascript
+// Enable verification for critical interactions
+const deleteBtn = await testdriver.find('delete account button', { verify: true });
+await deleteBtn.click();
+```
+### How Verify Mode Works
+1. **Phase 1**: AI locates the element and returns coordinates
+2. **Phase 2**: A second AI call examines the screenshot at those coordinates to confirm the element matches the description
+3. **Result**: If verification fails, the find is retried or marked as not found
+### Combining Zoom and Verify
+For maximum accuracy, enable both `zoom` and `verify` together. This is useful for critical interactions where clicking the wrong element could cause cascading failures:
+```javascript
+// Maximum accuracy: zoom for precision + verify to catch hallucinations
+const element = await testdriver.find('small cancel icon next to the subscription', {
+  zoom: true,
+  verify: true,
+});
+await element.click();
+```
+<Warning>
+  Both `zoom` and `verify` add extra AI calls per `find()` invocation, which increases latency and API usage. When both are enabled, each find may make up to 3 AI calls. **Rate limiting may occur** if many find calls use these options in rapid succession. Use them selectively for critical interactions rather than on every find call.
+</Warning>
 ## Cache Options
 Control caching behavior to optimize performance, especially when using dynamic variables in prompts.

package/ai/skills/testdriver-hosted/SKILL.md CHANGED Viewed

@@ -75,7 +75,7 @@ To prevent tests from failing due to exceeding your license slot limit, we recom
     ```javascript vitest.config.mjs
     import { defineConfig } from 'vitest/config';
-    import { TestDriver } from 'testdriverai/vitest';
+    import TestDriver from 'testdriverai/vitest';
     export default defineConfig({
       test: {

package/ai/skills/testdriver-machine-setup/SKILL.md ADDED Viewed

@@ -0,0 +1,262 @@
+---
+name: testdriver:machine-setup
+description: Configure Linux and Windows sandboxes, persist machines between runs, and install custom software
+---
+<!-- Generated from machine-setup.mdx. DO NOT EDIT. -->
+TestDriver provisions a fresh cloud VM for every test by default. This guide covers how to configure Linux and Windows machines, reduce startup time by keeping machines alive between runs, use provision scripts for repeatable setup, and install custom software on the fly.
+---
+## Linux Machines
+Linux is the default operating system. No extra configuration is required.
+```javascript
+import { describe, expect, it } from "vitest";
+import { TestDriver } from "testdriverai/vitest/hooks";
+describe("My Test", () => {
+  it("runs on Linux", async (context) => {
+    const testdriver = TestDriver(context);
+    await testdriver.provision.chrome({ url: "https://example.com" });
+    const result = await testdriver.assert("the page loaded successfully");
+    expect(result).toBeTruthy();
+  });
+});
+```
+### Common Linux Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `os` | string | `"linux"` | Operating system |
+| `resolution` | string | `"1366x768"` | Screen resolution |
+| `e2bTemplateId` | string | — | Custom E2B template ID (see [Self-Hosted](/v7/self-hosted)) |
+| `keepAlive` | number | `60000` | Ms to keep VM alive after disconnect |
+| `reconnect` | boolean | `false` | Reconnect to last used sandbox |
+```javascript
+const testdriver = TestDriver(context, {
+  os: "linux",
+  resolution: "1920x1080",
+  keepAlive: 5 * 60 * 1000, // keep alive 5 minutes
+});
+```
+---
+## Windows Machines
+Set `os: "windows"` to provision a Windows VM instead. Everything else works the same way.
+```javascript
+const testdriver = TestDriver(context, {
+  os: "windows",
+});
+await testdriver.provision.chrome({ url: "https://example.com" });
+```
+Windows sandboxes use EC2 instances and take longer to boot than Linux (E2B) sandboxes — typically 1–3 minutes for a cold start. See [Keeping Machines Alive](#keeping-machines-alive-between-runs) below to avoid this cost on repeated runs.
+### Common Windows Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `os` | string | — | Set to `"windows"` |
+| `resolution` | string | `"1366x768"` | Screen resolution |
+| `sandboxAmi` | string | — | Custom AMI ID (self-hosted) |
+| `sandboxInstance` | string | — | EC2 instance type (self-hosted) |
+| `keepAlive` | number | `60000` | Ms to keep VM alive after disconnect |
+| `reconnect` | boolean | `false` | Reconnect to last used sandbox |
+```javascript
+const testdriver = TestDriver(context, {
+  os: "windows",
+  resolution: "1920x1080",
+  keepAlive: 10 * 60 * 1000, // keep alive 10 minutes
+});
+```
+---
+## Keeping Machines Alive Between Runs
+Windows (and Linux) cold starts can be expensive if you're iterating quickly. Use `keepAlive` + `reconnect` to reuse the same VM across multiple test runs.
+### Step 1 — Start the machine with a long `keepAlive`
+```javascript
+// first.test.mjs
+const testdriver = TestDriver(context, {
+  os: "windows",
+  keepAlive: 30 * 60 * 1000, // keep alive 30 minutes after this test ends
+});
+await testdriver.provision.chrome({ url: "https://example.com" });
+// ... your test steps
+```
+When this test finishes, the sandbox stays running for 30 minutes instead of being terminated immediately.
+### Step 2 — Reconnect in subsequent runs
+```javascript
+// second.test.mjs
+const testdriver = TestDriver(context, {
+  os: "windows",
+  reconnect: true, // reads last sandbox ID from disk, skips provisioning
+});
+// provision.chrome() is automatically skipped — Chrome is already open
+await testdriver.find("Sign In button").click();
+```
+When `reconnect: true` is set:
+- The SDK reads the last sandbox ID from a local file via `getLastSandboxId()`
+- All `provision.*` calls are silently skipped since the application is already running
+- An error is thrown if no previous sandbox ID is found
+<Tip>
+  You can also supply a sandbox ID directly: `connect({ sandboxId: "sandbox-abc123" })`. Use `testdriver.getLastSandboxId()` to retrieve the ID of the last sandbox for scripting purposes.
+</Tip>
+### How `keepAlive` works
+`keepAlive` is a duration in milliseconds. After the SDK disconnects, the server keeps the VM running for that long before terminating it. The default is `60000` (1 minute). Set it to `0` to terminate immediately on disconnect.
+```javascript
+const testdriver = TestDriver(context, {
+  keepAlive: 0,           // terminate immediately
+  // keepAlive: 60000,    // default — 1 minute
+  // keepAlive: 600000,   // 10 minutes
+  // keepAlive: 3600000,  // 1 hour
+});
+```
+<Warning>
+  Machines kept alive beyond your test session continue to consume credits. Always set a `keepAlive` value appropriate for your workflow.
+</Warning>
+---
+## Using Provision Scripts
+Provision scripts let you run arbitrary setup steps before your test starts — downloading fixtures, seeding a database, configuring environment variables, and more. Use `testdriver.exec()` to run shell or PowerShell commands directly in the sandbox.
+<Card
+  title="exec() Reference"
+  icon="terminal"
+  href="/v7/exec"
+>
+  Full reference for running shell and PowerShell commands in the sandbox.
+</Card>
+### Linux setup script
+```javascript
+await testdriver.provision.chrome({ url: "https://myapp.com" });
+// Run a setup script from your repo
+await testdriver.exec("sh", `
+  curl -s https://myapp.com/api/reset-test-db -X POST
+  echo "Test DB reset"
+`, 30000);
+```
+### Windows setup script (PowerShell)
+```javascript
+await testdriver.provision.chrome({ url: "https://myapp.com" });
+await testdriver.exec("pwsh", `
+  $env:API_URL = "https://staging.myapp.com"
+  Write-Host "Environment configured"
+`, 15000);
+```
+### Clone a repo and run a script
+```javascript
+await testdriver.exec("sh", `
+  git clone https://github.com/myorg/test-fixtures.git /tmp/fixtures
+  bash /tmp/fixtures/seed.sh
+`, 120000);
+```
+---
+## Installing Custom Software
+You can install software at the start of a test using `exec()`. This works for any package available via `apt`, `brew`, `choco`, `winget`, npm, pip, or direct download.
+### Linux — apt packages
+```javascript
+await testdriver.exec("sh", `
+  sudo apt-get update -qq
+  sudo apt-get install -y ffmpeg imagemagick
+`, 120000);
+```
+### Linux — Node.js tools
+```javascript
+await testdriver.exec("sh", "npm install -g @playwright/test", 60000);
+```
+### Windows — winget
+```javascript
+await testdriver.exec("pwsh", `
+  winget install --id=7zip.7zip -e --silent
+`, 120000);
+```
+### Windows — Chocolatey
+```javascript
+await testdriver.exec("pwsh", `
+  choco install googlechrome --yes --no-progress
+`, 180000);
+```
+### Download and run an installer
+```javascript
+// Linux
+await testdriver.exec("sh", `
+  curl -L https://example.com/installer.sh -o /tmp/installer.sh
+  chmod +x /tmp/installer.sh
+  /tmp/installer.sh --silent
+`, 300000);
+// Windows
+await testdriver.exec("pwsh", `
+  Invoke-WebRequest -Uri "https://example.com/installer.exe" -OutFile "$env:TEMP\\installer.exe"
+  Start-Process "$env:TEMP\\installer.exe" -ArgumentList "/S" -Wait
+`, 300000);
+```
+<Note>
+  Installing software at test start adds to your test duration. For software you use in every test, consider preloading it into a custom VM image via the Enterprise self-hosted plan.
+</Note>
+---
+## Want Software Pre-Installed on Every Machine?
+Installing packages at runtime works well for occasional or lightweight dependencies. But if you're installing the same 5-minute setup on every test run, you're wasting time and credits.
+With the **Self-Hosted Enterprise plan** you get access to our golden VM base image and Packer scripts, so you can bake your applications, dependencies, and configuration directly into a custom AMI. Tests spin up with everything already installed — zero setup time.
+<Card
+  title="Self-Hosted Enterprise"
+  icon="server"
+  href="/v7/self-hosted"
+>
+  Preload software, configure custom hardware, and run unlimited tests with a flat license fee. Our team assists with deployment and setup.
+</Card>

package/ai/skills/testdriver-parse/SKILL.md CHANGED Viewed

@@ -120,7 +120,7 @@ console.log(`Found ${buttons.length} buttons`);
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Login Page", () => {
   it("should have expected form elements", async (context) => {

package/ai/skills/testdriver-reusable-code/SKILL.md CHANGED Viewed

@@ -49,7 +49,7 @@ Now import and use these helpers in any test:
 ```javascript test/checkout.test.mjs
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 import { login } from './helpers/auth.js';
 describe("Checkout", () => {
@@ -120,7 +120,7 @@ Use the page object in your tests:
 ```javascript test/auth.test.mjs
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 import { LoginPage } from './pages/LoginPage.js';
 describe("Authentication", () => {
@@ -190,7 +190,7 @@ export async function setupAuthenticatedSession(testdriver, user = testUsers.reg
 ```javascript test/admin.test.mjs
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 import { testUsers, testUrls, setupAuthenticatedSession } from './fixtures/index.js';
 describe("Admin Panel", () => {

package/ai/skills/testdriver-running-tests/SKILL.md CHANGED Viewed

@@ -101,7 +101,7 @@ Set `maxConcurrency` in your Vitest config to match your license slot limit:
 ```javascript vitest.config.mjs
 import { defineConfig } from 'vitest/config';
-import { TestDriver } from 'testdriverai/vitest';
+import TestDriver from 'testdriverai/vitest';
 export default defineConfig({
   test: {

package/ai/skills/testdriver-screenshot/SKILL.md CHANGED Viewed

@@ -9,7 +9,7 @@ description: Capture and save screenshots during test execution
 Capture a screenshot of the current screen and automatically save it to a local file. Screenshots are organized by test file for easy debugging and review.
 <Note>
-  **Automatic Screenshots (Default: Enabled)**: TestDriver automatically captures screenshots before and after every command (click, type, find, etc.). These are saved with descriptive filenames like `001-click-before-L42-submit-button.png` that include the line number from your test file. You can disable this with `autoScreenshots: false` in your TestDriver options.
+  **Automatic Screenshots**: TestDriver can automatically capture screenshots before and after every command (click, type, find, etc.). These are saved with descriptive filenames like `001-click-before-L42-submit-button.png` that include the line number from your test file. Enable this with `autoScreenshots: true` in your TestDriver options.
 </Note>
 ## Syntax
@@ -50,7 +50,7 @@ Screenshots are automatically saved to `.testdriver/screenshots/<test-file-name>
 ### Automatic Screenshot Naming
-When `autoScreenshots` is enabled (default), filenames follow this format:
+When `autoScreenshots` is enabled, filenames follow this format:
 `<seq>-<action>-<phase>-L<line>-<description>.png`
@@ -91,7 +91,7 @@ await testdriver.screenshot("after-click");
 ```javascript
 import { describe, expect, it } from "vitest";
-import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
+import { TestDriver } from "testdriverai/vitest/hooks";
 describe("Login Flow", () => {
   it("should log in successfully", async (context) => {
@@ -188,7 +188,7 @@ If an error occurs, the phase will be `error` instead of `after`.
 <AccordionGroup>
   <Accordion title="Let automatic screenshots do the work">
-    With `autoScreenshots: true` (default), you get comprehensive coverage without adding manual `screenshot()` calls. Only add manual screenshots for specific named checkpoints.
+    With `autoScreenshots: true`, you get comprehensive coverage without adding manual `screenshot()` calls. Only add manual screenshots for specific named checkpoints.
   </Accordion>
   <Accordion title="Use screenshots for debugging flaky tests">

package/ai/skills/testdriver-scroll/SKILL.md CHANGED Viewed

@@ -26,7 +26,7 @@ Scroll the page or active element in any direction using mouse wheel or keyboard
 ## Syntax
 ```javascript
-await testdriver.scroll(direction, amount, method)
+await testdriver.scroll(direction, options)
 ```
 ## Parameters
@@ -35,12 +35,12 @@ await testdriver.scroll(direction, amount, method)
   Direction to scroll: `'up'`, `'down'`, `'left'`, `'right'`
 </ParamField>
-<ParamField path="amount" type="number" default="3">
-  Amount to scroll in clicks (scroll wheel units). Each click is roughly 100px in a browser.
-</ParamField>
-<ParamField path="method" type="string" default="mouse">
-  Scroll method: `'mouse'` or `'keyboard'`
+<ParamField path="options" type="object">
+  <Expandable title="properties">
+    <ParamField path="amount" type="number" default="300">
+      Amount to scroll in pixels
+    </ParamField>
+  </Expandable>
 </ParamField>
 ## Returns
@@ -56,33 +56,33 @@ await testdriver.scroll(direction, amount, method)
 await testdriver.scroll();
 // Scroll down 5 clicks
-await testdriver.scroll('down', 5);
+await testdriver.scroll('down', { amount: 5 });
 // Scroll up
 await testdriver.scroll('up');
 // Scroll up 2 clicks
-await testdriver.scroll('up', 2);
+await testdriver.scroll('up', { amount: 2 });
 ```
 ### Horizontal Scrolling
 ```javascript
 // Scroll right
-await testdriver.scroll('right', 3);
+await testdriver.scroll('right', { amount: 3 });
 // Scroll left
-await testdriver.scroll('left', 3);
+await testdriver.scroll('left', { amount: 3 });
 ```
 ### Scroll Methods
 ```javascript
-// Mouse wheel scroll (smooth)
-await testdriver.scroll('down', 3, 'mouse');
+// Mouse wheel scroll (default)
+await testdriver.scroll('down', { amount: 3 });
-// Keyboard scroll (uses Page Down/Up, more compatible)
-await testdriver.scroll('down', 3, 'keyboard');
+// For keyboard-based scrolling, use pressKeys instead
+await testdriver.pressKeys(['pagedown']);
 ```
 ## Best Practices
@@ -101,7 +101,7 @@ await testdriver.scroll('down', 3, 'keyboard');
   // await testdriver.find('page background').click();
   // Now scroll will work properly
-  await testdriver.scroll('down', 3);
+  await testdriver.scroll('down');
   // If scroll still doesn't work, use Page Down directly
   // await testdriver.pressKeys(['pagedown']);
@@ -109,14 +109,14 @@ await testdriver.scroll('down', 3, 'keyboard');
 </Check>
 <Check>
-  **Choose the right scroll method**
+  **Control scroll distance with the options object**
   ```javascript
-  // For web pages, mouse scroll is usually smoother
-  await testdriver.scroll('down', 3, 'mouse');
+  // For web pages, mouse scroll works well
+  await testdriver.scroll('down', { amount: 3 });
-  // For desktop apps or when mouse doesn't work
-  await testdriver.scroll('down', 3, 'keyboard');
+  // For desktop apps or when mouse doesn't work, use keyboard
+  await testdriver.pressKeys(['pagedown']);
   ```
 </Check>
@@ -133,7 +133,7 @@ await testdriver.scroll('down', 3, 'keyboard');
     ```javascript
     // Scroll multiple times for infinite scroll
     for (let i = 0; i < 5; i++) {
-      await testdriver.scroll('down', 5);
+      await testdriver.scroll('down', { amount: 5 });
       await new Promise(r => setTimeout(r, 1000)); // Wait for load
     }
     ```
@@ -142,7 +142,7 @@ await testdriver.scroll('down', 3, 'keyboard');
   <Accordion title="Horizontal Gallery">
     ```javascript
     // Navigate horizontal carousel
-    await testdriver.scroll('right', 3);
+    await testdriver.scroll('right', { amount: 3 });
     await new Promise(r => setTimeout(r, 500));
     const nextImage = await testdriver.find('next image in carousel');
@@ -174,7 +174,7 @@ describe('Scrolling', () => {
     await testdriver.focusApplication('Google Chrome');
     // Scroll down the page
-    await testdriver.scroll('down', 5);
+    await testdriver.scroll('down', { amount: 5 });
     // Click footer link
     const privacyLink = await testdriver.find('Privacy Policy link');
@@ -188,7 +188,7 @@ describe('Scrolling', () => {
     // Scroll multiple times to load content
     for (let i = 0; i < 3; i++) {
-      await testdriver.scroll('down', 5);
+      await testdriver.scroll('down', { amount: 5 });
       await new Promise(r => setTimeout(r, 1500)); // Wait for load
     }

package/docs/docs.json CHANGED Viewed

@@ -68,6 +68,7 @@
             "pages": [
               "/v7/generating-tests",
               "/v7/device-config",
+               "/v7/machine-setup",
               "/v7/locating-elements",
               "/v7/waiting-for-elements",
               "/v7/performing-actions",

package/docs/v7/machine-setup.mdx ADDED Viewed

@@ -0,0 +1,262 @@
+---
+title: "Machine Setup"
+description: "Configure Linux and Windows sandboxes, persist machines between runs, and install custom software"
+icon: "desktop"
+---
+TestDriver provisions a fresh cloud VM for every test by default. This guide covers how to configure Linux and Windows machines, reduce startup time by keeping machines alive between runs, use provision scripts for repeatable setup, and install custom software on the fly.
+---
+## Linux Machines
+Linux is the default operating system. No extra configuration is required.
+```javascript
+import { describe, expect, it } from "vitest";
+import { TestDriver } from "testdriverai/vitest/hooks";
+describe("My Test", () => {
+  it("runs on Linux", async (context) => {
+    const testdriver = TestDriver(context);
+    await testdriver.provision.chrome({ url: "https://example.com" });
+    const result = await testdriver.assert("the page loaded successfully");
+    expect(result).toBeTruthy();
+  });
+});
+```
+### Common Linux Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `os` | string | `"linux"` | Operating system |
+| `resolution` | string | `"1366x768"` | Screen resolution |
+| `e2bTemplateId` | string | — | Custom E2B template ID (see [Self-Hosted](/v7/self-hosted)) |
+| `keepAlive` | number | `60000` | Ms to keep VM alive after disconnect |
+| `reconnect` | boolean | `false` | Reconnect to last used sandbox |
+```javascript
+const testdriver = TestDriver(context, {
+  os: "linux",
+  resolution: "1920x1080",
+  keepAlive: 5 * 60 * 1000, // keep alive 5 minutes
+});
+```
+---
+## Windows Machines
+Set `os: "windows"` to provision a Windows VM instead. Everything else works the same way.
+```javascript
+const testdriver = TestDriver(context, {
+  os: "windows",
+});
+await testdriver.provision.chrome({ url: "https://example.com" });
+```
+Windows sandboxes use EC2 instances and take longer to boot than Linux (E2B) sandboxes — typically 1–3 minutes for a cold start. See [Keeping Machines Alive](#keeping-machines-alive-between-runs) below to avoid this cost on repeated runs.
+### Common Windows Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `os` | string | — | Set to `"windows"` |
+| `resolution` | string | `"1366x768"` | Screen resolution |
+| `sandboxAmi` | string | — | Custom AMI ID (self-hosted) |
+| `sandboxInstance` | string | — | EC2 instance type (self-hosted) |
+| `keepAlive` | number | `60000` | Ms to keep VM alive after disconnect |
+| `reconnect` | boolean | `false` | Reconnect to last used sandbox |
+```javascript
+const testdriver = TestDriver(context, {
+  os: "windows",
+  resolution: "1920x1080",
+  keepAlive: 10 * 60 * 1000, // keep alive 10 minutes
+});
+```
+---
+## Keeping Machines Alive Between Runs
+Windows (and Linux) cold starts can be expensive if you're iterating quickly. Use `keepAlive` + `reconnect` to reuse the same VM across multiple test runs.
+### Step 1 — Start the machine with a long `keepAlive`
+```javascript
+// first.test.mjs
+const testdriver = TestDriver(context, {
+  os: "windows",
+  keepAlive: 30 * 60 * 1000, // keep alive 30 minutes after this test ends
+});
+await testdriver.provision.chrome({ url: "https://example.com" });
+// ... your test steps
+```
+When this test finishes, the sandbox stays running for 30 minutes instead of being terminated immediately.
+### Step 2 — Reconnect in subsequent runs
+```javascript
+// second.test.mjs
+const testdriver = TestDriver(context, {
+  os: "windows",
+  reconnect: true, // reads last sandbox ID from disk, skips provisioning
+});
+// provision.chrome() is automatically skipped — Chrome is already open
+await testdriver.find("Sign In button").click();
+```
+When `reconnect: true` is set:
+- The SDK reads the last sandbox ID from a local file via `getLastSandboxId()`
+- All `provision.*` calls are silently skipped since the application is already running
+- An error is thrown if no previous sandbox ID is found
+<Tip>
+  You can also supply a sandbox ID directly: `await testdriver.connect({ sandboxId: "sandbox-abc123" })`. Use `testdriver.getLastSandboxId()` to retrieve the ID of the last sandbox for scripting purposes.
+</Tip>
+### How `keepAlive` works
+`keepAlive` is a duration in milliseconds. After the SDK disconnects, the server keeps the VM running for that long before terminating it. The default is `60000` (1 minute). Note: `keepAlive: 0` currently falls back to the default disconnect grace period rather than terminating immediately, so use a positive duration when you want to control the grace window explicitly.
+```javascript
+const testdriver = TestDriver(context, {
+  keepAlive: 0,           // currently uses the default 1 minute grace period
+  // keepAlive: 60000,    // default — 1 minute
+  // keepAlive: 600000,   // 10 minutes
+  // keepAlive: 3600000,  // 1 hour
+});
+```
+<Warning>
+  Machines kept alive beyond your test session continue to consume credits. Always set a `keepAlive` value appropriate for your workflow.
+</Warning>
+---
+## Using Provision Scripts
+Provision scripts let you run arbitrary setup steps before your test starts — downloading fixtures, seeding a database, configuring environment variables, and more. Use `testdriver.exec()` to run shell or PowerShell commands directly in the sandbox.
+<Card
+  title="exec() Reference"
+  icon="terminal"
+  href="/v7/exec"
+>
+  Full reference for running shell and PowerShell commands in the sandbox.
+</Card>
+### Linux setup script
+```javascript
+await testdriver.provision.chrome({ url: "https://myapp.com" });
+// Run a setup script from your repo
+await testdriver.exec("sh", `
+  curl -s https://myapp.com/api/reset-test-db -X POST
+  echo "Test DB reset"
+`, 30000);
+```
+### Windows setup script (PowerShell)
+```javascript
+await testdriver.provision.chrome({ url: "https://myapp.com" });
+await testdriver.exec("pwsh", `
+  $env:API_URL = "https://staging.myapp.com"
+  Write-Host "Environment configured"
+`, 15000);
+```
+### Clone a repo and run a script
+```javascript
+await testdriver.exec("sh", `
+  git clone https://github.com/myorg/test-fixtures.git /tmp/fixtures
+  bash /tmp/fixtures/seed.sh
+`, 120000);
+```
+---
+## Installing Custom Software
+You can install software at the start of a test using `exec()`. This works for any package available via `apt`, `brew`, `choco`, `winget`, npm, pip, or direct download.
+### Linux — apt packages
+```javascript
+await testdriver.exec("sh", `
+  sudo apt-get update -qq
+  sudo apt-get install -y ffmpeg imagemagick
+`, 120000);
+```
+### Linux — Node.js tools
+```javascript
+await testdriver.exec("sh", "npm install -g @playwright/test", 60000);
+```
+### Windows — winget
+```javascript
+await testdriver.exec("pwsh", `
+  winget install --id=7zip.7zip -e --silent
+`, 120000);
+```
+### Windows — Chocolatey
+```javascript
+await testdriver.exec("pwsh", `
+  choco install googlechrome --yes --no-progress
+`, 180000);
+```
+### Download and run an installer
+```javascript
+// Linux
+await testdriver.exec("sh", `
+  curl -L https://example.com/installer.sh -o /tmp/installer.sh
+  chmod +x /tmp/installer.sh
+  /tmp/installer.sh --silent
+`, 300000);
+// Windows
+await testdriver.exec("pwsh", `
+  Invoke-WebRequest -Uri "https://example.com/installer.exe" -OutFile "$env:TEMP\\installer.exe"
+  Start-Process "$env:TEMP\\installer.exe" -ArgumentList "/S" -Wait
+`, 300000);
+```
+<Note>
+  Installing software at test start adds to your test duration. For software you use in every test, consider preloading it into a custom VM image via the Enterprise self-hosted plan.
+</Note>
+---
+## Want Software Pre-Installed on Every Machine?
+Installing packages at runtime works well for occasional or lightweight dependencies. But if you're installing the same 5-minute setup on every test run, you're wasting time and credits.
+With the **Self-Hosted Enterprise plan** you get access to our golden VM base image and Packer scripts, so you can bake your applications, dependencies, and configuration directly into a custom AMI. Tests spin up with everything already installed — zero setup time.
+<Card
+  title="Self-Hosted Enterprise"
+  icon="server"
+  href="/v7/self-hosted"
+>
+  Preload software, configure custom hardware, and run unlimited tests with a flat license fee. Our team assists with deployment and setup.
+</Card>

package/examples/config.mjs CHANGED Viewed

@@ -1,5 +1,4 @@
 export const getDefaults = (context) => ({
   ip: context.ip || process.env.TD_IP,
-  redraw: { enabled: false },
   preview: 'web',
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "testdriverai",
-  "version": "7.9.59-test",
+  "version": "7.9.61-canary",
   "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
   "main": "sdk.js",
   "types": "sdk.d.ts",

package/sdk.js CHANGED Viewed

@@ -3850,7 +3850,7 @@ CAPTCHA_SOLVER_EOF`,
     const apiKey = this.config?.TD_API_KEY || '';
     const maskedKey = apiKey.length > 4 ? '***' + apiKey.slice(-4) : '(not set)';
     const env = process.env.TD_CHANNEL || process.env.TD_ENV || 'unknown';
-    const os = this.os || this.agent?.cliArgs?.options?.os || process.env.TD_OS || 'linux';
+    const os = this.os || this.agent?.options?.os || process.env.TD_OS || 'linux';
     const sdkVersion = require('./package.json').version;
     // Always print local config immediately