npm - @testdriverai/agent - Versions diffs - 7.9.103-canary → 7.9.104-canary - Mend

@testdriverai/agent 7.9.103-canary → 7.9.104-canary

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/agent/interface.js +7 -1
package/agent/lib/commands.js +8 -6
package/agent/lib/system.js +60 -6
package/docs/docs.json +16 -1
package/docs/v7/ai/agent.mdx +72 -0
package/docs/v7/ai/mcp.mdx +228 -0
package/docs/v7/ai/skills.mdx +73 -0
package/docs/v7/find.mdx +2 -0
package/interfaces/cli/commands/init.js +81 -2
package/lib/init-project.js +57 -28
package/lib/install-clients.js +470 -0
package/mcp-server/dist/server.mjs +245 -66
package/mcp-server/src/server.ts +250 -32
package/package.json +1 -1
package/sdk.js +14 -12

package/agent/interface.js CHANGED Viewed

@@ -10,7 +10,13 @@ function createCommandDefinitions(agent) {
     init: {
       description: "Initialize a new TestDriver project with Vitest SDK examples",
       args: {},
-      flags: {},
+      flags: {
+        client: Flags.string({
+          description:
+            "AI client(s) to install into (comma-separated, or 'all'). e.g. --client claude-code,cursor. Omit for an interactive picker.",
+          multiple: false,
+        }),
+      },
       handler: async () => {
         // This handler is special - it doesn't need an agent instance
         // It just scaffolds files, so it will be handled by the CLI command

package/agent/lib/commands.js CHANGED Viewed

@@ -302,10 +302,12 @@ const createCommands = (
       `🔍 assert() threshold: ${threshold} (cache ${threshold < 0 ? "DISABLED" : "ENABLED"}${cacheKey ? `, cacheKey: ${cacheKey.substring(0, 8)}...` : ""})`,
     );
-    // Use v7 endpoint for assert with caching support
+    // Use v7 endpoint for assert with caching support.
+    // captureScreenImage returns { imageKey } (fast S3-key path) or { image }
+    // (base64 fallback) — see system.captureScreenImage.
     let response = await sdk.req("assert", {
       expect: assertion,
-      image: await system.captureScreenBase64(),
+      ...(await system.captureScreenImage()),
       threshold,
       cacheKey,
       os,
@@ -815,7 +817,7 @@ const createCommands = (
       let response = await sdk.req("find", {
         element: description,
-        image: await system.captureScreenBase64(),
+        ...(await system.captureScreenImage()),
       });
       if (!response || !response.coordinates) {
@@ -855,7 +857,7 @@ const createCommands = (
       let response = await sdk.req("find", {
         element: description,
-        image: await system.captureScreenBase64(),
+        ...(await system.captureScreenImage()),
       });
       if (!response || !response.coordinates) {
@@ -1211,7 +1213,7 @@ const createCommands = (
       while (durationPassed < timeout && !passed) {
         const response = await sdk.req("find", {
           element: text,
-          image: await system.captureScreenBase64(),
+          ...(await system.captureScreenImage()),
         });
         passed = !!(response && response.coordinates);
@@ -1304,7 +1306,7 @@ const createCommands = (
       while (scrollDistance <= maxDistance && !passed) {
         const response = await sdk.req("find", {
           element: text,
-          image: await system.captureScreenBase64(),
+          ...(await system.captureScreenImage()),
         });
         passed = !!(response && response.coordinates);

package/agent/lib/system.js CHANGED Viewed

@@ -53,15 +53,28 @@ const createSystem = (emitter, sandbox, config) => {
     return Buffer.from(imageResponse.data).toString("base64");
   };
-  const screenshot = async (options) => {
+  // Capture a screenshot from the runner. Returns the raw runner response,
+  // which is one of:
+  //   { s3Key, width, height } — runner uploaded to S3 (Ably 64KB limit)
+  //   { base64 }               — direct/local connection, bytes inline
+  const captureRaw = async () => {
+    return await sandbox.send({
+      type: "system.screenshot",
+    });
+  };
+  const screenshot = async (options, rawResponse) => {
     const MAX_RETRIES = 3;
     let lastError;
     for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
       try {
-        let response = await sandbox.send({
-          type: "system.screenshot",
-        });
+        // Reuse a response captured by the caller (so the key path and the
+        // base64 path don't each trigger a separate runner capture); otherwise
+        // capture fresh.
+        let response = attempt === 0 && rawResponse
+          ? rawResponse
+          : await captureRaw();
         let base64;
@@ -112,7 +125,7 @@ const createSystem = (emitter, sandbox, config) => {
     return path.join(os.tmpdir(), `td-${Date.now()}-${randomUUID().slice(0, 8)}-${countImages}.png`);
   };
-  const captureAndResize = async (scale = 1, silent = false, mouse = false) => {
+  const captureAndResize = async (scale = 1, silent = false, mouse = false, rawResponse = null) => {
     try {
       if (!silent) {
         emitter.emit(events.screenCapture.start, {
@@ -125,7 +138,7 @@ const createSystem = (emitter, sandbox, config) => {
       let step1 = tmpFilename();
       let step2 = tmpFilename();
-      await screenshot({ filename: step1, format: "png" });
+      await screenshot({ filename: step1, format: "png" }, rawResponse);
       // Load the screenshot image with Jimp
       let image = await Jimp.read(step1);
@@ -187,6 +200,46 @@ const createSystem = (emitter, sandbox, config) => {
     return await captureAndResize(scale, silent, mouse);
   };
+  // Build the image payload to send to the API for a command (find/assert/etc).
+  //
+  // Fast path: when the runner uploaded the screenshot to S3 and it was already
+  // captured at the requested resolution, return { imageKey } so the API reads
+  // the bytes straight from S3 by key. This skips the redundant round-trip the
+  // base64 path pays per command — SDK download from S3, Jimp re-encode, then a
+  // re-upload on the API side.
+  //
+  // Slow path (fallback): when bytes are inline (local/direct connection), when
+  // a mouse cursor must be composited, when scale != 1, or when the captured
+  // size differs from TD_RESOLUTION (so a resize is actually required), fall
+  // back to capturing + resizing locally and return { image } (base64).
+  const captureScreenImage = async (scale = 1, silent = false, mouse = false) => {
+    const raw = await captureRaw();
+    const [targetW, targetH] = config.TD_RESOLUTION || [];
+    const canUseKey =
+      raw &&
+      raw.s3Key &&
+      !mouse &&
+      scale === 1 &&
+      typeof raw.width === "number" &&
+      typeof raw.height === "number" &&
+      raw.width === targetW &&
+      raw.height === targetH;
+    if (canUseKey) {
+      if (!silent) {
+        emitter.emit(events.screenCapture.start, { scale, silent, display: primaryDisplay });
+        emitter.emit(events.screenCapture.end, { scale, silent, display: primaryDisplay });
+      }
+      return { imageKey: raw.s3Key };
+    }
+    // Fallback: download/resize locally and send base64. Pass the already
+    // captured runner response through so we don't capture the screen twice.
+    const step2 = await captureAndResize(scale, silent, mouse, raw);
+    return { image: fs.readFileSync(step2, "base64") };
+  };
   const platform = () => {
     return "windows";
   };
@@ -213,6 +266,7 @@ const createSystem = (emitter, sandbox, config) => {
   return {
     captureScreenBase64,
     captureScreenPNG,
+    captureScreenImage,
     getMousePosition,
     primaryDisplay,
     activeWin,

package/docs/docs.json CHANGED Viewed

@@ -99,10 +99,25 @@
               }
             ]
           },
+          {
+            "group": "AI",
+            "icon": "robot",
+            "pages": [
+              "/v7/ai/agent",
+              "/v7/ai/skills",
+              "/v7/ai/mcp"
+            ]
+          },
           {
             "group": "Actions",
             "pages": [
-              "/v7/ai",
+              {
+                "group": "ai",
+                "tag": "Beta",
+                "pages": [
+                  "/v7/ai"
+                ]
+              },
               "/v7/assert",
               "/v7/captcha",
               "/v7/click",

package/docs/v7/ai/agent.mdx ADDED Viewed

@@ -0,0 +1,72 @@
+---
+title: "Agent"
+sidebarTitle: "Agent"
+description: "The TestDriver test-creator agent that writes and debugs end-to-end tests for you"
+icon: "robot"
+---
+## Overview
+The **TestDriver agent** is an expert test-creator that runs inside your AI client (Claude Code, Cursor, VS Code, and others). It writes, runs, and debugs real end-to-end tests by driving your app the same way a person would — using AI vision to find elements, click, type, and assert — through the [TestDriver MCP Server](/v7/ai/mcp).
+Unlike a chat assistant that only suggests code, the agent works **iteratively against a live sandbox**: it starts a session, performs each action, writes the generated code to your test file, verifies the result with a screenshot, and reruns the test until it passes.
+## What it does
+- **Builds tests from scratch** using TestDriver [skills](/v7/ai/skills) and best practices.
+- **Drives a live sandbox** via MCP tools (`session_start`, `find`, `click`, `type`, `assert`, `check`, …), getting a screenshot and generated code after every action.
+- **Writes code immediately** to the test file after each successful step — never all at once at the end.
+- **Verifies visually** with `check` to confirm each action did what was intended.
+- **Runs the test itself** with `vitest run` and iterates until it passes reliably.
+- **Shares the run report** — after each run it surfaces the `TESTDRIVER_RUN_URL` so you can watch the recording.
+## Installation
+The agent is installed automatically by `testdriverai init`, alongside the [skills](/v7/ai/skills) and the [MCP server](/v7/ai/mcp):
+```bash
+npx testdriverai init
+```
+During init you'll be asked which AI client(s) to install into. The agent is written to the location each client expects:
+| Client | Agent location |
+| --- | --- |
+| Claude Code | `.claude/agents/testdriver.md` |
+| VS Code (Copilot) | `.github/agents/testdriver.agent.md` |
+| Cursor | `.cursor/rules/testdriver.mdc` |
+| Windsurf | `.windsurf/rules/testdriver.md` |
+| Codex | `AGENTS.md` |
+| Zed | `.rules` |
+To install for a specific client without the interactive picker:
+```bash
+npx testdriverai init --client claude-code
+# or several at once
+npx testdriverai init --client claude-code,cursor,vscode
+# or everything
+npx testdriverai init --client all
+```
+See the [MCP Server](/v7/ai/mcp) page for the full client matrix, including web-based clients (Lovable, Replit, v0) that require a few manual steps.
+## Prerequisites
+You need a TestDriver API key. Create one at [console.testdriver.ai/team](https://console.testdriver.ai/team) and `init` will save it to `.env` as `TD_API_KEY`.
+## Using the agent
+Once installed, invoke it from your client's chat:
+```text
+@testdriver write a test that logs in and verifies the dashboard loads
+```
+The agent will spin up a sandbox, perform the steps live, write them into a test file under `tests/`, and run it for you.
+## Related
+- [Skills](/v7/ai/skills) — the building blocks the agent composes tests from
+- [MCP Server](/v7/ai/mcp) — the tools the agent uses to drive your app
+- [Generating tests](/v7/generating-tests) — how test generation works end-to-end

package/docs/v7/ai/mcp.mdx ADDED Viewed

@@ -0,0 +1,228 @@
+---
+title: "MCP Server"
+sidebarTitle: "MCP Server"
+description: "Install the TestDriver MCP server in any AI client — Claude Code, Cursor, VS Code, Windsurf, Codex, Zed, and more"
+icon: "plug"
+---
+## Overview
+The **TestDriver MCP server** exposes TestDriver's computer-use tools — `session_start`, `find`, `click`, `type`, `assert`, `check`, `screenshot`, and more — over the [Model Context Protocol](https://modelcontextprotocol.io). Any MCP-capable AI client can use it to drive a live sandbox and write real end-to-end tests.
+It runs as a local stdio process:
+```bash
+npx -p testdriverai testdriverai-mcp
+```
+and authenticates with your `TD_API_KEY` (from [console.testdriver.ai/team](https://console.testdriver.ai/team)).
+## Quick install (recommended)
+`testdriverai init` wires up the MCP server, the [agent](/v7/ai/agent), and the [skills](/v7/ai/skills) for you, writing each client's config in the exact format and location it expects:
+```bash
+# interactive — pick your client(s)
+npx testdriverai init
+# one client
+npx testdriverai init --client claude-code
+# several
+npx testdriverai init --client claude-code,cursor,vscode
+# everything
+npx testdriverai init --client all
+```
+<Info>
+`init` detects clients already present in your project and pre-selects them in the picker. Re-running `init` is safe — it merges the TestDriver entry into existing config without overwriting your other servers.
+</Info>
+## Client support matrix
+| Client | Auto-install | MCP config file | Config key |
+| --- | --- | --- | --- |
+| Claude Code | ✅ | `.mcp.json` | `mcpServers` |
+| Claude Desktop | ✅ | OS-specific (see below) | `mcpServers` |
+| Cursor | ✅ | `.cursor/mcp.json` | `mcpServers` |
+| VS Code (Copilot) | ✅ | `.vscode/mcp.json` | `servers` |
+| Windsurf | ✅ | `~/.codeium/windsurf/mcp_config.json` | `mcpServers` |
+| Codex | ✅ | `~/.codex/config.toml` | `[mcp_servers]` |
+| Zed | ✅ | `.zed/settings.json` | `context_servers` |
+| Lovable | ⚙️ partial | GitHub `AGENTS.md` + UI | — |
+| Replit | ⚙️ partial | `replit.md` + UI | — |
+| v0 (Vercel) | 📝 manual | web UI only | — |
+<Note>
+Each client uses a **different top-level key** for MCP servers. The most common mistake when configuring by hand is using `mcpServers` for VS Code (it wants `servers`), Codex (TOML `[mcp_servers]`), or Zed (`context_servers`).
+</Note>
+## Manual installation
+If you'd rather configure by hand, use the snippets below. The stdio command is the same everywhere; only the wrapper key and file location change.
+<Tabs>
+  <Tab title="Claude Code">
+    Add to `.mcp.json` at your project root (or `~/.claude.json` for all projects):
+    ```json
+    {
+      "mcpServers": {
+        "testdriver": {
+          "type": "stdio",
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "${TD_API_KEY}" }
+        }
+      }
+    }
+    ```
+  </Tab>
+  <Tab title="Claude Desktop">
+    Edit the Claude Desktop config file:
+    - **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
+    - **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
+    - **Linux:** `~/.config/Claude/claude_desktop_config.json`
+    ```json
+    {
+      "mcpServers": {
+        "testdriver": {
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "your_api_key" }
+        }
+      }
+    }
+    ```
+    Restart Claude Desktop after saving.
+  </Tab>
+  <Tab title="Cursor">
+    Add to `.cursor/mcp.json` (project) or `~/.cursor/mcp.json` (global):
+    ```json
+    {
+      "mcpServers": {
+        "testdriver": {
+          "type": "stdio",
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "${TD_API_KEY}" }
+        }
+      }
+    }
+    ```
+  </Tab>
+  <Tab title="VS Code">
+    Add to `.vscode/mcp.json`. VS Code uses the `servers` key and an `inputs` prompt for secrets:
+    ```json
+    {
+      "servers": {
+        "testdriver": {
+          "type": "stdio",
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "${input:testdriver-api-key}" }
+        }
+      },
+      "inputs": [
+        {
+          "type": "promptString",
+          "id": "testdriver-api-key",
+          "description": "TestDriver API Key From https://console.testdriver.ai/team",
+          "password": true
+        }
+      ]
+    }
+    ```
+  </Tab>
+  <Tab title="Windsurf">
+    Windsurf reads MCP config globally. Add to `~/.codeium/windsurf/mcp_config.json`:
+    ```json
+    {
+      "mcpServers": {
+        "testdriver": {
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "${TD_API_KEY}" }
+        }
+      }
+    }
+    ```
+  </Tab>
+  <Tab title="Codex">
+    Codex uses TOML. Add to `~/.codex/config.toml`:
+    ```toml
+    [mcp_servers.testdriver]
+    command = "npx"
+    args = ["-p", "testdriverai", "testdriverai-mcp"]
+    env = { TD_API_KEY = "${TD_API_KEY}" }
+    ```
+  </Tab>
+  <Tab title="Zed">
+    Zed calls them "context servers". Add to `.zed/settings.json` (project) or `~/.config/zed/settings.json` (global):
+    ```json
+    {
+      "context_servers": {
+        "testdriver": {
+          "command": "npx",
+          "args": ["-p", "testdriverai", "testdriverai-mcp"],
+          "env": { "TD_API_KEY": "${TD_API_KEY}" }
+        }
+      }
+    }
+    ```
+  </Tab>
+</Tabs>
+## Web-based clients
+Lovable, Replit, and v0 run in the browser, so the MCP server can't be launched as a local process. Configure them through each product's UI.
+<AccordionGroup>
+  <Accordion title="Lovable">
+    1. Connect your GitHub repo and run `npx testdriverai init --client lovable` — this writes `AGENTS.md` and the skills into the repo so Lovable's agent picks them up.
+    2. In Lovable, open **Settings → MCP** and add the TestDriver server.
+  </Accordion>
+  <Accordion title="Replit">
+    1. Run `npx testdriverai init --client replit` to write `replit.md` with the TestDriver agent guidance.
+    2. In Replit, open **Tools → Integrations → MCP** and add a custom MCP server.
+  </Accordion>
+  <Accordion title="v0 (Vercel)">
+    v0 is fully UI-driven and does not read repo files.
+    1. Open **[v0.app/chat/settings/mcp-connections](https://v0.app/chat/settings/mcp-connections)** and add the TestDriver MCP connection.
+    2. Paste the agent guidance into **Instructions** (the **+** in the prompt bar).
+  </Accordion>
+</AccordionGroup>
+## Verifying the install
+After installing, open your client's chat and ask the agent to write a test:
+```text
+@testdriver write a test that opens the homepage and asserts the title
+```
+If the MCP server is wired up correctly, the agent will start a session and you'll see screenshots come back as it works. If tools don't appear, check that `TD_API_KEY` is set and restart the client.
+## Related
+- [Agent](/v7/ai/agent) — the test-creator that uses these tools
+- [Skills](/v7/ai/skills) — instruction files describing each tool
+- [CI/CD](/v7/ci-cd) — running the generated tests in your pipeline

package/docs/v7/ai/skills.mdx ADDED Viewed

@@ -0,0 +1,73 @@
+---
+title: "Skills"
+sidebarTitle: "Skills"
+description: "Composable instruction files that teach any AI client how to use TestDriver"
+icon: "puzzle-piece"
+---
+## Overview
+**Skills** are small, focused instruction files — one per TestDriver capability — that teach your AI client exactly how to use each part of the TestDriver SDK and MCP tools. They follow the [Anthropic `SKILL.md` format](https://code.claude.com/docs/en/skills): a folder per skill, each containing a `SKILL.md` with YAML frontmatter and a markdown body.
+There are **106 skills**, generated directly from the TestDriver documentation, covering every action and concept: `find`, `click`, `type`, `assert`, `check`, `scroll`, `press-keys`, `provision`, caching, secrets, CI/CD, and more.
+```text
+.claude/skills/
+├── testdriver-click/
+│   └── SKILL.md
+├── testdriver-find/
+│   └── SKILL.md
+├── testdriver-assert/
+│   └── SKILL.md
+└── … (103 more)
+```
+Each `SKILL.md` looks like:
+```markdown
+---
+name: testdriver:click
+description: Click at specific coordinates or on elements
+---
+## Element Click
+When called on an Element object, clicks on the located element.
+…
+```
+## Why skills
+The [agent](/v7/ai/agent) is general; skills are specific. When the agent needs to perform an action — say, locate an element — it pulls in the `testdriver:find` skill, which contains the exact syntax, options, return shape, and gotchas for that one method. This keeps the agent accurate without bloating its base prompt, and lets clients load only the skills relevant to the current step.
+## Installation
+Skills are installed by `testdriverai init` along with the [agent](/v7/ai/agent) and [MCP server](/v7/ai/mcp):
+```bash
+npx testdriverai init
+```
+They're written to the skills directory each client expects:
+| Client | Skills location |
+| --- | --- |
+| Claude Code | `.claude/skills/<name>/SKILL.md` |
+| Zed | `.agents/skills/<name>/SKILL.md` |
+| Codex | referenced from `AGENTS.md` |
+| VS Code · Cursor · Windsurf | folded into the agent rules/instructions |
+Clients without a native skills concept (Cursor, VS Code, Windsurf) still get the agent definition, which references the same guidance inline.
+## Authoring & regenerating
+Skills are **generated, not hand-edited** — each is built from a `.mdx` page in the docs. Do not edit `SKILL.md` files directly (they carry a `DO NOT EDIT` marker). To change a skill, edit the corresponding documentation page and regenerate:
+```bash
+node docs/_scripts/generate-skills.js
+```
+## Related
+- [Agent](/v7/ai/agent) — composes skills into working tests
+- [MCP Server](/v7/ai/mcp) — the tools the skills describe how to use

package/docs/v7/find.mdx CHANGED Viewed

@@ -402,6 +402,8 @@ await element.click();
 ## Cache Options
+When a test completes successfully, the result of each `find()` is cached. On later runs, TestDriver reuses the cached match instead of making a fresh AI call, which significantly speeds up locating the same element. The cache lives in your [dashboard](https://console.testdriver.ai/cache) and is shared across runs — see the [Cache](/v7/cache) page for how matching, thresholds, and invalidation work.
 Control caching behavior to optimize performance, especially when using dynamic variables in prompts.
 ### Custom Cache Key