testdriverai 7.5.25 → 7.6.0-test.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/v7/exec.mdx CHANGED
@@ -1,13 +1,13 @@
1
1
  ---
2
2
  title: "exec()"
3
3
  sidebarTitle: "exec"
4
- description: "Execute code or shell commands in the sandbox"
4
+ description: "Execute shell or PowerShell commands in the sandbox"
5
5
  icon: "terminal"
6
6
  ---
7
7
 
8
8
  ## Overview
9
9
 
10
- Execute JavaScript code in the browser or PowerShell commands in the Windows sandbox environment.
10
+ Execute shell commands (Linux) or PowerShell commands (Windows) in the sandbox environment.
11
11
 
12
12
  ## Syntax
13
13
 
@@ -18,7 +18,7 @@ await testdriver.exec(language, code, timeout, silent)
18
18
  ## Parameters
19
19
 
20
20
  <ParamField path="language" type="string" required>
21
- Language to execute: `'js'` (JavaScript) or `'pwsh'` (PowerShell)
21
+ Language to execute: `'sh'` (Shell/Linux) or `'pwsh'` (PowerShell/Windows)
22
22
  </ParamField>
23
23
 
24
24
  <ParamField path="code" type="string" required>
@@ -37,57 +37,40 @@ await testdriver.exec(language, code, timeout, silent)
37
37
 
38
38
  `Promise<string>` - Command output
39
39
 
40
- ## JavaScript Execution
40
+ ## Shell Execution (Linux)
41
41
 
42
- Execute JavaScript in the browser context (Windows sandbox only).
42
+ Execute shell commands in the Linux sandbox.
43
43
 
44
- ### DOM Manipulation
44
+ ### Basic Commands
45
45
 
46
46
  ```javascript
47
- // Click an element via JavaScript
48
- await testdriver.exec('js', `
49
- document.querySelector('#submit-button').click();
50
- `, 5000);
47
+ // List files
48
+ const files = await testdriver.exec('sh', 'ls -la', 5000);
51
49
 
52
- // Fill a form
53
- await testdriver.exec('js', `
54
- document.querySelector('#username').value = 'testuser';
55
- document.querySelector('#password').value = 'password123';
56
- document.querySelector('#login-form').submit();
57
- `, 5000);
50
+ // Check current directory
51
+ const pwd = await testdriver.exec('sh', 'pwd', 5000);
58
52
 
59
- // Scroll to element
60
- await testdriver.exec('js', `
61
- document.querySelector('#footer').scrollIntoView();
62
- `, 5000);
53
+ // Run a script
54
+ await testdriver.exec('sh', './setup.sh', 60000);
63
55
  ```
64
56
 
65
- ### Reading Page Data
57
+ ### File Operations
66
58
 
67
59
  ```javascript
68
- // Get page title
69
- const title = await testdriver.exec('js', 'document.title', 5000);
70
- console.log('Page title:', title);
71
-
72
- // Get all links
73
- const links = await testdriver.exec('js', `
74
- Array.from(document.querySelectorAll('a'))
75
- .map(a => a.href)
76
- .join('\\n')
77
- `, 5000);
60
+ // Create a file
61
+ await testdriver.exec('sh', 'echo "Hello World" > test.txt', 5000);
78
62
 
79
- // Check if element exists
80
- const exists = await testdriver.exec('js', `
81
- document.querySelector('.error-message') !== null
82
- `, 5000);
63
+ // Read a file
64
+ const content = await testdriver.exec('sh', 'cat test.txt', 5000);
83
65
 
84
- // Get element text
85
- const text = await testdriver.exec('js', `
86
- document.querySelector('.notification').textContent
87
- `, 5000);
66
+ // Copy files
67
+ await testdriver.exec('sh', 'cp source.txt dest.txt', 5000);
68
+
69
+ // Delete files
70
+ await testdriver.exec('sh', 'rm test.txt', 5000);
88
71
  ```
89
72
 
90
- ## PowerShell Execution
73
+ ## PowerShell Execution (Windows)
91
74
 
92
75
  Execute PowerShell commands in the Windows sandbox.
93
76
 
@@ -216,7 +199,7 @@ await testdriver.exec('pwsh', '.\\setup.ps1', 60000, true);
216
199
 
217
200
  ```javascript
218
201
  // Quick operations: 5000ms
219
- await testdriver.exec('js', 'document.title', 5000);
202
+ await testdriver.exec('sh', 'ls -la', 5000);
220
203
 
221
204
  // Installations: 30000-60000ms
222
205
  await testdriver.exec('pwsh', 'npm install -g package', 30000);
@@ -289,27 +272,20 @@ describe('Code Execution', () => {
289
272
  await testdriver.disconnect();
290
273
  });
291
274
 
292
- it('should execute JavaScript in browser', async () => {
293
- await testdriver.focusApplication('Google Chrome');
275
+ it('should execute shell commands on Linux', async () => {
276
+ // List directory
277
+ const files = await testdriver.exec('sh', 'ls -la', 5000);
278
+ console.log('Files:', files);
294
279
 
295
- // Get page info via JavaScript
296
- const title = await testdriver.exec('js', 'document.title', 5000);
297
- console.log('Page title:', title);
280
+ // Create a file
281
+ await testdriver.exec('sh', 'echo "Hello World" > test.txt', 5000);
298
282
 
299
- // Manipulate DOM
300
- await testdriver.exec('js', `
301
- document.querySelector('#username').value = 'testuser';
302
- `, 5000);
303
-
304
- // Verify
305
- const value = await testdriver.exec('js', `
306
- document.querySelector('#username').value
307
- `, 5000);
308
-
309
- expect(value).toBe('testuser');
283
+ // Read the file
284
+ const content = await testdriver.exec('sh', 'cat test.txt', 5000);
285
+ expect(content).toContain('Hello World');
310
286
  });
311
287
 
312
- it('should install and use tools', async () => {
288
+ it('should install and use tools on Windows', async () => {
313
289
  // Install tool
314
290
  await testdriver.exec('pwsh', 'npm install -g http-server', 30000, true);
315
291
 
@@ -331,10 +307,6 @@ describe('Code Execution', () => {
331
307
  `, 5000);
332
308
 
333
309
  await testdriver.focusApplication('Google Chrome');
334
-
335
- // Verify page loaded
336
- const content = await testdriver.exec('js', 'document.body.textContent', 5000);
337
- expect(content).toContain('Test Page');
338
310
  });
339
311
  });
340
312
  ```
@@ -342,5 +314,5 @@ describe('Code Execution', () => {
342
314
  ## Related Methods
343
315
 
344
316
  - [`focusApplication()`](/v7/focus-application) - Focus apps before exec
345
- - [`find()`](/v7/find) - Locate elements (alternative to DOM manipulation)
346
- - [`type()`](/v7/type) - Type text (alternative to JS form filling)
317
+ - [`find()`](/v7/find) - Locate elements visually
318
+ - [`type()`](/v7/type) - Type text into inputs
@@ -23,7 +23,7 @@ TestDriver makes it easy to write automated computer-use tests for web browsers,
23
23
  npx testdriverai init
24
24
  ```
25
25
 
26
- This will walk you through creating a new project folder, installing dependencies, and setting up your API key.
26
+ This will walk you through creating a new project folder, installing dependencies, setting up your API key, and configuring MCP for your preferred AI assistant (VS Code, Cursor, Claude Desktop, etc.).
27
27
 
28
28
  </Step>
29
29
 
@@ -1,5 +1,5 @@
1
1
  export const getDefaults = (context) => ({
2
2
  ip: context.ip || process.env.TD_IP,
3
3
  redraw: { enabled: false },
4
- preview: 'ide',
4
+ preview: 'web',
5
5
  });
@@ -1,24 +1,31 @@
1
1
  /**
2
- * TestDriver SDK - Assert Test (Vitest)
3
- * Converted from: testdriver/acceptance/assert.yaml
2
+ * TestDriver SDK - No-Provision Test with Dashcam (Vitest)
3
+ *
4
+ * Demonstrates manual dashcam control without using provision methods.
5
+ * When not using provision.chrome(), provision.vscode(), etc., you need
6
+ * to manually start and stop dashcam recording.
4
7
  */
5
8
 
6
- import { describe, expect, it } from "vitest";
9
+ import { describe, it } from "vitest";
7
10
  import { TestDriver } from "../lib/vitest/hooks.mjs";
8
11
  import { getDefaults } from "./config.mjs";
9
12
 
10
- describe("Assert Test", () => {
11
- it("should assert the testdriver login page shows", async (context) => {
13
+ describe("No-Provision with Dashcam", () => {
14
+ it("should record dashcam while asserting desktop is visible", async (context) => {
12
15
  const testdriver = TestDriver(context, { ...getDefaults(context) });
13
16
 
14
- await testdriver.wait(10000)
17
+ // Start dashcam recording manually (provision methods do this automatically)
18
+ await testdriver.dashcam.start();
15
19
 
16
- // Assert the TestDriver.ai Sandbox login page is displayed
17
- const result = await testdriver.assert(
18
- "A desktop is visible",
19
- );
20
+ await testdriver.exec('sh', 'gedit >/dev/null 2>&1 &'); // Example command to keep the test running for a bit
20
21
 
21
- expect(result).toBeTruthy();
22
+ await testdriver.assert('untitled document is visible');
23
+
24
+ // Stop dashcam and get the recording URL
25
+ const dashcamUrl = await testdriver.dashcam.stop();
26
+ if (dashcamUrl) {
27
+ console.log(`🎥 Dashcam recording: ${dashcamUrl}`);
28
+ }
22
29
  });
23
30
  });
24
31
 
@@ -12,7 +12,8 @@ const { execSync } = require("child_process");
12
12
  require("dotenv").config();
13
13
 
14
14
  // API configuration
15
- const API_BASE_URL = process.env.TD_API_ROOT || "https://api.testdriver.ai";
15
+ const channelConfig = require("../../../../channel.json");
16
+ const API_BASE_URL = process.env.TD_API_ROOT || channelConfig.channels[channelConfig.active];
16
17
  const POLL_INTERVAL = 5000; // 5 seconds
17
18
  const POLL_TIMEOUT = 900000; // 15 minutes
18
19
 
@@ -9,6 +9,7 @@ import { setTestRunInfo } from "./shared-test-state.mjs";
9
9
 
10
10
  // Use createRequire to import CommonJS modules without esbuild processing
11
11
  const require = createRequire(import.meta.url);
12
+ const channelConfig = require("../channel.json");
12
13
 
13
14
  // Import Sentry for error reporting
14
15
  const Sentry = require("@sentry/node");
@@ -763,7 +764,7 @@ export default function testDriverPlugin(options = {}) {
763
764
  pluginState.apiRoot =
764
765
  options.apiRoot ||
765
766
  process.env.TD_API_ROOT ||
766
- "https://api.testdriver.ai";
767
+ channelConfig.channels[channelConfig.active];
767
768
  pluginState.ciProvider = detectCI();
768
769
  pluginState.gitInfo = getGitInfo();
769
770
 
@@ -822,7 +823,7 @@ class TestDriverReporter {
822
823
  pluginState.apiRoot =
823
824
  this.options.apiRoot ||
824
825
  process.env.TD_API_ROOT ||
825
- "https://api.testdriver.ai";
826
+ channelConfig.channels[channelConfig.active];
826
827
  logger.debug("API key from options:", !!this.options.apiKey);
827
828
  logger.debug("API key from env (at onInit):", !!process.env.TD_API_KEY);
828
829
  logger.debug("API root from options:", this.options.apiRoot);
@@ -1258,6 +1259,26 @@ function getConsoleUrl(apiRoot) {
1258
1259
  return `http://localhost:3001`;
1259
1260
  }
1260
1261
 
1262
+ // Render PR previews: map API service to Web service
1263
+ // canary-api-pr-123.onrender.com -> canary-web-pr-123.onrender.com
1264
+ // testdriver-api-i4m4-pr-123.onrender.com -> web-i4m4-pr-123.onrender.com
1265
+ const renderPrMatch = apiRoot.match(/https:\/\/([\w-]+)-api(-[\w]+)?(-pr-\d+)?\.onrender\.com/);
1266
+ if (renderPrMatch) {
1267
+ const [, prefix, suffix, prSuffix] = renderPrMatch;
1268
+ // Map API naming to Web naming:
1269
+ // canary-api -> canary-web
1270
+ // testdriver-api-i4m4 -> web-i4m4
1271
+ let webPrefix;
1272
+ if (prefix === 'testdriver' && suffix) {
1273
+ // testdriver-api-i4m4 -> web-i4m4
1274
+ webPrefix = 'web' + suffix;
1275
+ } else {
1276
+ // canary-api -> canary-web
1277
+ webPrefix = prefix + '-web';
1278
+ }
1279
+ return `https://${webPrefix}${prSuffix || ''}.onrender.com`;
1280
+ }
1281
+
1261
1282
  // Other tunnels or unknown hosts: return as-is
1262
1283
  return apiRoot;
1263
1284
  }
@@ -80,8 +80,9 @@ class Dashcam {
80
80
  * @private
81
81
  */
82
82
  _getApiRoot() {
83
+ const channelConfig = require("../../channel.json");
83
84
  return (
84
- this.client.config?.TD_API_ROOT || "https://api.testdriver.ai"
85
+ this.client.config?.TD_API_ROOT || channelConfig.channels[channelConfig.active]
85
86
  );
86
87
  }
87
88
 
@@ -91,7 +92,7 @@ class Dashcam {
91
92
  * @param {string} apiRoot - The API root URL
92
93
  * @returns {string} The corresponding console URL
93
94
  */
94
- static getConsoleUrl(apiRoot = "https://api.testdriver.ai") {
95
+ static getConsoleUrl(apiRoot = (() => { const c = require("../../channel.json"); return c.channels[c.active]; })()) {
95
96
  // Allow explicit override via env (e.g. VITE_DOMAIN from .env)
96
97
  if (process.env.VITE_DOMAIN) return process.env.VITE_DOMAIN;
97
98
 
@@ -110,6 +111,26 @@ class Dashcam {
110
111
  return "http://localhost:3001";
111
112
  }
112
113
 
114
+ // Render PR previews: map API service to Web service
115
+ // canary-api-pr-123.onrender.com -> canary-web-pr-123.onrender.com
116
+ // testdriver-api-i4m4-pr-123.onrender.com -> web-i4m4-pr-123.onrender.com
117
+ const renderPrMatch = apiRoot.match(/https:\/\/([\w-]+)-api(-[\w]+)?(-pr-\d+)?\.onrender\.com/);
118
+ if (renderPrMatch) {
119
+ const [, prefix, suffix, prSuffix] = renderPrMatch;
120
+ // Map API naming to Web naming:
121
+ // canary-api -> canary-web
122
+ // testdriver-api-i4m4 -> web-i4m4
123
+ let webPrefix;
124
+ if (prefix === 'testdriver' && suffix) {
125
+ // testdriver-api-i4m4 -> web-i4m4
126
+ webPrefix = 'web' + suffix;
127
+ } else {
128
+ // canary-api -> canary-web
129
+ webPrefix = prefix + '-web';
130
+ }
131
+ return `https://${webPrefix}${prSuffix || ''}.onrender.com`;
132
+ }
133
+
113
134
  // Cloudflare tunnels, custom domains, etc.: the web console is served
114
135
  // from the same origin as the API, so return apiRoot as-is.
115
136
  return apiRoot;
@@ -337,40 +337,80 @@ jobs:
337
337
  progress("⊘ GitHub workflow already exists");
338
338
  }
339
339
 
340
- // 6. Create VSCode MCP config
341
- const vscodeDir = path.join(targetDir, ".vscode");
342
- if (!fs.existsSync(vscodeDir)) {
343
- fs.mkdirSync(vscodeDir, { recursive: true });
344
- }
340
+ // 6. Setup MCP configuration
341
+ // When triggered from VS Code extension, create .vscode/mcp.json silently
342
+ // When triggered from CLI, use interactive add-mcp for user to select their MCP client
343
+ const isVscodeInit = process.env.TD_INIT_SOURCE === "vscode";
344
+
345
+ if (isVscodeInit) {
346
+ // VS Code extension: create .vscode/mcp.json directly
347
+ const vscodeDir = path.join(targetDir, ".vscode");
348
+ if (!fs.existsSync(vscodeDir)) {
349
+ fs.mkdirSync(vscodeDir, { recursive: true });
350
+ }
345
351
 
346
- const mcpConfigFile = path.join(vscodeDir, "mcp.json");
347
- if (!fs.existsSync(mcpConfigFile)) {
348
- const mcpConfig = {
349
- inputs: [
350
- {
351
- type: "promptString",
352
- id: "testdriver-api-key",
353
- description: "TestDriver API Key From https://console.testdriver.ai/team",
354
- password: true,
355
- },
356
- ],
357
- servers: {
358
- testdriver: {
359
- command: "npx",
360
- args: ["-p", "testdriverai", "testdriverai-mcp"],
361
- env: {
362
- TD_API_KEY: "${input:testdriver-api-key}",
352
+ const mcpConfigFile = path.join(vscodeDir, "mcp.json");
353
+ if (!fs.existsSync(mcpConfigFile)) {
354
+ const mcpConfig = {
355
+ inputs: [
356
+ {
357
+ type: "promptString",
358
+ id: "testdriver-api-key",
359
+ description: "TestDriver API Key From https://console.testdriver.ai/team",
360
+ password: true,
361
+ },
362
+ ],
363
+ servers: {
364
+ testdriver: {
365
+ command: "npx",
366
+ args: ["-p", "testdriverai", "testdriverai-mcp"],
367
+ env: {
368
+ TD_API_KEY: "${input:testdriver-api-key}",
369
+ },
363
370
  },
364
371
  },
365
- },
366
- };
367
- fs.writeFileSync(mcpConfigFile, JSON.stringify(mcpConfig, null, 2) + "\n");
368
- progress("✓ Created MCP config: .vscode/mcp.json");
372
+ };
373
+ fs.writeFileSync(mcpConfigFile, JSON.stringify(mcpConfig, null, 2) + "\n");
374
+ progress("✓ Created MCP config: .vscode/mcp.json");
375
+ } else {
376
+ progress("⊘ MCP config already exists");
377
+ }
369
378
  } else {
370
- progress("⊘ MCP config already exists");
379
+ // CLI: use add-mcp for interactive MCP client selection
380
+ progress("🔧 Setting up MCP integration...");
381
+ try {
382
+ const addMcpResult = require("child_process").spawnSync(
383
+ "npx",
384
+ [
385
+ "add-mcp",
386
+ "testdriver",
387
+ "--command",
388
+ "npx -p testdriverai testdriverai-mcp",
389
+ "--env",
390
+ "TD_API_KEY",
391
+ ],
392
+ {
393
+ cwd: targetDir,
394
+ stdio: "inherit", // Pass through stdin/stdout for interactive prompts
395
+ shell: process.platform === "win32",
396
+ }
397
+ );
398
+
399
+ if (addMcpResult.status === 0) {
400
+ progress("✓ MCP configured via add-mcp");
401
+ } else if (addMcpResult.status !== null) {
402
+ progress("⚠ MCP setup skipped or failed - you can run 'npx add-mcp testdriver' later");
403
+ }
404
+ } catch (err) {
405
+ progress("⚠ Could not run add-mcp - you can run 'npx add-mcp testdriver' later");
406
+ }
371
407
  }
372
408
 
373
409
  // 7. Create VSCode extensions recommendations
410
+ const vscodeDir = path.join(targetDir, ".vscode");
411
+ if (!fs.existsSync(vscodeDir)) {
412
+ fs.mkdirSync(vscodeDir, { recursive: true });
413
+ }
374
414
  const extensionsFile = path.join(vscodeDir, "extensions.json");
375
415
  if (!fs.existsSync(extensionsFile)) {
376
416
  const extensionsConfig = {
@@ -22,6 +22,7 @@ import TestDriverSDK from "../../sdk.js";
22
22
 
23
23
  // Use createRequire to import CommonJS modules
24
24
  const require = createRequire(import.meta.url);
25
+ const channelConfig = require("../../channel.json");
25
26
 
26
27
  /**
27
28
  * Minimum required Vitest major version
@@ -255,7 +256,7 @@ async function uploadLogsToReplay(client, dashcamUrl) {
255
256
  }
256
257
 
257
258
  // Use the SDK's configured API root (matches what the SDK uses for all other API calls)
258
- const apiRoot = client.config?.TD_API_ROOT || process.env.TD_API_ROOT || "https://api.testdriver.ai";
259
+ const apiRoot = client.config?.TD_API_ROOT || process.env.TD_API_ROOT || channelConfig.channels[channelConfig.active];
259
260
 
260
261
  console.log(`[TestDriver] Uploading logs for replay ${replayId} to ${apiRoot}...`);
261
262
 
@@ -11,9 +11,19 @@ MCP server that enables AI agents to iteratively build TestDriver tests with vis
11
11
 
12
12
  ## Installation
13
13
 
14
- ### Via npx (Recommended)
14
+ ### Quick Install (Recommended)
15
15
 
16
- No installation needed! Just configure your MCP client to use npx:
16
+ Use `add-mcp` to automatically configure TestDriver for your MCP client:
17
+
18
+ ```bash
19
+ npx add-mcp testdriver
20
+ ```
21
+
22
+ This will prompt you to select your MCP client (VS Code, Cursor, Claude Desktop, etc.) and configure it automatically.
23
+
24
+ ### Manual Configuration
25
+
26
+ If you prefer to configure manually, add the following to your MCP config file:
17
27
 
18
28
  ```json
19
29
  {
@@ -141,7 +141,7 @@ export function generateActionCode(action, args, result) {
141
141
  return `const assertResult = await testdriver.assert("${escapeString(assertion)}");\nexpect(assertResult).toBeTruthy();`;
142
142
  }
143
143
  case "exec": {
144
- const language = args.language || "js";
144
+ const language = args.language || "sh";
145
145
  const code = args.code;
146
146
  const timeout = args.timeout;
147
147
  if (code.includes("\n")) {
@@ -687,6 +687,7 @@ registerAppTool(server, "find", {
687
687
  const duration = Date.now() - startTime;
688
688
  // Store cropped image for resource serving (instead of inline data URL)
689
689
  let croppedImageResourceUri;
690
+ let screenshotResourceUri;
690
691
  const croppedImage = rawResponse.croppedImage;
691
692
  if (croppedImage) {
692
693
  const imageData = croppedImage.startsWith('data:')
@@ -696,6 +697,20 @@ registerAppTool(server, "find", {
696
697
  // Remove croppedImage from response to avoid context bloat
697
698
  delete rawResponse.croppedImage;
698
699
  }
700
+ else if (!found) {
701
+ // Element not found and no cropped image - capture a fresh screenshot
702
+ // so the user can see what's currently visible on screen
703
+ try {
704
+ const screenshotBase64 = await sdk.agent.system.captureScreenBase64(1, false, true);
705
+ if (screenshotBase64) {
706
+ screenshotResourceUri = storeImage(screenshotBase64, "screenshot");
707
+ logger.debug("find: Captured screenshot for not-found state");
708
+ }
709
+ }
710
+ catch (e) {
711
+ logger.warn("find: Failed to capture screenshot for not-found state", { error: String(e) });
712
+ }
713
+ }
699
714
  // Remove extractedText and pixelDiffImage from response to reduce context bloat
700
715
  delete rawResponse.extractedText;
701
716
  delete rawResponse.pixelDiffImage;
@@ -717,6 +732,7 @@ registerAppTool(server, "find", {
717
732
  element: elementInfo,
718
733
  ref: elementRef,
719
734
  croppedImageResourceUri,
735
+ screenshotResourceUri,
720
736
  duration,
721
737
  }, generatedCode);
722
738
  }
@@ -786,6 +802,7 @@ registerAppTool(server, "findall", {
786
802
  const duration = Date.now() - startTime;
787
803
  // Store cropped image for resource serving (instead of inline data URL)
788
804
  let croppedImageResourceUri;
805
+ let screenshotResourceUri;
789
806
  const croppedImage = rawResponse.croppedImage;
790
807
  if (croppedImage) {
791
808
  const imageData = croppedImage.startsWith('data:')
@@ -795,6 +812,20 @@ registerAppTool(server, "findall", {
795
812
  // Remove croppedImage from response to avoid context bloat
796
813
  delete rawResponse.croppedImage;
797
814
  }
815
+ else if (count === 0) {
816
+ // No elements found and no cropped image - capture a fresh screenshot
817
+ // so the user can see what's currently visible on screen
818
+ try {
819
+ const screenshotBase64 = await sdk.agent.system.captureScreenBase64(1, false, true);
820
+ if (screenshotBase64) {
821
+ screenshotResourceUri = storeImage(screenshotBase64, "screenshot");
822
+ logger.debug("findall: Captured screenshot for not-found state");
823
+ }
824
+ }
825
+ catch (e) {
826
+ logger.warn("findall: Failed to capture screenshot for not-found state", { error: String(e) });
827
+ }
828
+ }
798
829
  // Remove extractedText and pixelDiffImage from response to reduce context bloat
799
830
  delete rawResponse.extractedText;
800
831
  delete rawResponse.pixelDiffImage;
@@ -810,6 +841,7 @@ registerAppTool(server, "findall", {
810
841
  refs,
811
842
  elements: elementInfos,
812
843
  croppedImageResourceUri,
844
+ screenshotResourceUri,
813
845
  duration,
814
846
  }, generatedCode);
815
847
  }
@@ -1029,6 +1061,7 @@ registerAppTool(server, "find_and_click", {
1029
1061
  const duration = Date.now() - startTime;
1030
1062
  // Store cropped image (screenshot) for resource serving
1031
1063
  let croppedImageResourceUri;
1064
+ let screenshotResourceUri;
1032
1065
  const croppedImage = rawResponse.croppedImage;
1033
1066
  if (croppedImage) {
1034
1067
  const imageData = croppedImage.startsWith('data:')
@@ -1037,6 +1070,20 @@ registerAppTool(server, "find_and_click", {
1037
1070
  croppedImageResourceUri = storeImage(imageData, "screenshot");
1038
1071
  delete rawResponse.croppedImage;
1039
1072
  }
1073
+ else {
1074
+ // No cropped image - capture a fresh screenshot so the user can see
1075
+ // what's currently visible on screen when element was not found
1076
+ try {
1077
+ const screenshotBase64 = await sdk.agent.system.captureScreenBase64(1, false, true);
1078
+ if (screenshotBase64) {
1079
+ screenshotResourceUri = storeImage(screenshotBase64, "screenshot");
1080
+ logger.debug("find_and_click: Captured screenshot for not-found state");
1081
+ }
1082
+ }
1083
+ catch (e) {
1084
+ logger.warn("find_and_click: Failed to capture screenshot for not-found state", { error: String(e) });
1085
+ }
1086
+ }
1040
1087
  // Remove extractedText and pixelDiffImage from response to reduce context bloat
1041
1088
  delete rawResponse.extractedText;
1042
1089
  delete rawResponse.pixelDiffImage;
@@ -1045,6 +1092,7 @@ registerAppTool(server, "find_and_click", {
1045
1092
  action: "find_and_click",
1046
1093
  error: "Element not found",
1047
1094
  croppedImageResourceUri,
1095
+ screenshotResourceUri,
1048
1096
  duration
1049
1097
  });
1050
1098
  }
@@ -1372,9 +1420,9 @@ You can optionally provide a reference image URI to compare against a previous s
1372
1420
  });
1373
1421
  // Exec
1374
1422
  server.registerTool("exec", {
1375
- description: "Execute code in the sandbox (JavaScript, shell, or PowerShell)",
1423
+ description: "Execute shell or PowerShell commands in the sandbox",
1376
1424
  inputSchema: z.object({
1377
- language: z.enum(["js", "sh", "pwsh"]).default("js"),
1425
+ language: z.enum(["sh", "pwsh"]).default("sh"),
1378
1426
  code: z.string().describe("Code to execute"),
1379
1427
  timeout: z.number().default(30000).describe("Timeout in ms"),
1380
1428
  }),
@@ -161,7 +161,7 @@ export function generateActionCode(
161
161
  }
162
162
 
163
163
  case "exec": {
164
- const language = (args.language as string) || "js";
164
+ const language = (args.language as string) || "sh";
165
165
  const code = args.code as string;
166
166
  const timeout = args.timeout as number | undefined;
167
167