testdriverai 7.3.34 → 7.3.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,64 @@ Configure TestDriver behavior with options passed to the `TestDriver()` function
10
10
 
11
11
  ```javascript
12
12
  const testdriver = TestDriver(context, {
13
- reconnect: false,
13
+ // === Sandbox & Connection ===
14
+ newSandbox: true, // Force creation of a new sandbox (default: true)
15
+ reconnect: false, // Reconnect to last sandbox (default: false)
16
+ keepAlive: 60000, // Keep sandbox alive after disconnect in ms (default: 60000)
17
+ os: "linux", // 'linux' | 'windows' (default: 'linux')
18
+ resolution: "1366x768", // Sandbox resolution (e.g., '1920x1080')
19
+ ip: "203.0.113.42", // Direct IP for self-hosted sandbox
20
+ sandboxAmi: "ami-1234", // Custom AMI ID (AWS deployments)
21
+ sandboxInstance: "i3.metal", // EC2 instance type (AWS deployments)
22
+
23
+ // === Preview & Debugging ===
24
+ preview: "browser", // "browser" | "ide" | "none" (default: "browser")
25
+ headless: false, // @deprecated - use preview: "none" instead
26
+ debugOnFailure: false, // Keep sandbox alive on test failure for debugging
27
+
28
+ // === Caching ===
29
+ cache: true, // Enable element caching (default: true)
30
+ // Or use advanced caching config:
31
+ // cache: {
32
+ // enabled: true,
33
+ // thresholds: {
34
+ // find: { screen: 0.05, element: 0.8 },
35
+ // assert: 0.05
36
+ // }
37
+ // },
38
+ cacheKey: "my-test", // Cache key for element finding operations
39
+
40
+ // === Recording & Screenshots ===
41
+ dashcam: true, // Enable/disable Dashcam video recording (default: true)
42
+ autoScreenshots: true, // Capture screenshots before/after each command (default: true)
43
+
44
+ // === AI Configuration ===
45
+ ai: { // Global AI sampling configuration
46
+ temperature: 0, // 0 = deterministic, higher = more creative
47
+ top: {
48
+ p: 0.9, // Top-P nucleus sampling (0-1)
49
+ k: 40, // Top-K sampling (1 = most likely, 0 = disabled)
50
+ },
51
+ },
52
+
53
+ // === Screen Change Detection ===
54
+ redraw: true, // Enable redraw detection (default: true)
55
+ // Or use advanced redraw config:
56
+ // redraw: {
57
+ // enabled: true,
58
+ // thresholds: {
59
+ // screen: 0.05, // Pixel diff threshold (0-1), false to disable
60
+ // network: false, // Monitor network activity (default: false)
61
+ // }
62
+ // },
63
+
64
+ // === Logging & Analytics ===
65
+ logging: true, // Enable console logging output (default: true)
66
+ analytics: true, // Enable analytics tracking (default: true)
67
+
68
+ // === Advanced ===
69
+ apiRoot: "https://...", // API endpoint URL (for self-hosted deployments)
70
+ environment: {}, // Additional environment variables for the sandbox
14
71
  });
15
72
  ```
16
73
 
@@ -54,6 +111,16 @@ const testdriver = TestDriver(context, {
54
111
  The legacy `headless: true` option still works for backward compatibility and maps to `preview: "none"`.
55
112
  </Note>
56
113
 
114
+ ### Debug on Failure
115
+
116
+ Keep the sandbox alive when a test fails so you can reconnect and debug interactively. The sandbox ID is printed to the console along with instructions for reconnecting via MCP.
117
+
118
+ ```javascript
119
+ const testdriver = TestDriver(context, {
120
+ debugOnFailure: true,
121
+ });
122
+ ```
123
+
57
124
  ### IP Target
58
125
 
59
126
  If self-hosting TestDriver, use `ip` to specify the device IP. See [Self-Hosting TestDriver](../self-hosting.md) for details.
@@ -105,11 +172,102 @@ steps:
105
172
  - run: TD_OS=${{ matrix.os }} vitest run
106
173
  ```
107
174
 
108
- ## Keepalive
175
+ ### Dashcam Recording
176
+
177
+ Dashcam video recording is enabled by default. Disable it to skip recording:
178
+
179
+ ```javascript
180
+ const testdriver = TestDriver(context, {
181
+ dashcam: false,
182
+ });
183
+ ```
184
+
185
+ ### Automatic Screenshots
186
+
187
+ Screenshots are automatically captured before and after every command (click, type, find, assert, etc.) by default. Each screenshot filename includes the line number from your test file.
188
+
189
+ Disable automatic screenshots:
190
+
191
+ ```javascript
192
+ const testdriver = TestDriver(context, {
193
+ autoScreenshots: false,
194
+ });
195
+ ```
196
+
197
+ ### Caching
198
+
199
+ Element caching speeds up repeated `find()` and `assert()` calls. Enabled by default.
200
+
201
+ ```javascript
202
+ // Disable caching
203
+ const testdriver = TestDriver(context, {
204
+ cache: false,
205
+ });
206
+
207
+ // Advanced: custom thresholds
208
+ const testdriver = TestDriver(context, {
209
+ cache: {
210
+ enabled: true,
211
+ thresholds: {
212
+ find: { screen: 0.05, element: 0.8 },
213
+ assert: 0.05,
214
+ },
215
+ },
216
+ cacheKey: "my-test",
217
+ });
218
+ ```
109
219
 
110
- By default, sandboxes terminate immediately when the test finishes. Set this value to keep the sandbox alive for reconnection.
220
+ ### Redraw Detection
111
221
 
112
- The `keepAlive` param enables you to keep the sandbox running after the test completes for debugging or reconnection. This will allow you to use the debugger to inspect the state of the device after the test has finished.
222
+ Redraw detection waits for the screen to stabilize before taking actions. Enabled by default.
223
+
224
+ ```javascript
225
+ // Disable redraw detection
226
+ const testdriver = TestDriver(context, {
227
+ redraw: false,
228
+ });
229
+
230
+ // Advanced: custom thresholds with network monitoring
231
+ const testdriver = TestDriver(context, {
232
+ redraw: {
233
+ enabled: true,
234
+ thresholds: {
235
+ screen: 0.05,
236
+ network: true,
237
+ },
238
+ },
239
+ });
240
+ ```
241
+
242
+ ### AI Configuration
243
+
244
+ Control how the AI model generates responses for `find()` verification and `assert()` calls:
245
+
246
+ ```javascript
247
+ const testdriver = TestDriver(context, {
248
+ ai: {
249
+ temperature: 0, // 0 = deterministic
250
+ top: { p: 0.9, k: 40 },
251
+ },
252
+ });
253
+ ```
254
+
255
+ ### Environment Variables
256
+
257
+ Pass additional environment variables to the sandbox:
258
+
259
+ ```javascript
260
+ const testdriver = TestDriver(context, {
261
+ environment: {
262
+ MY_VAR: "value",
263
+ DEBUG: "true",
264
+ },
265
+ });
266
+ ```
267
+
268
+ ## Keepalive
269
+
270
+ By default, sandboxes stay alive for 60 seconds after disconnect. Customize this with `keepAlive`:
113
271
 
114
272
  ```javascript
115
273
  const testdriver = TestDriver(context, {
@@ -117,6 +275,14 @@ const testdriver = TestDriver(context, {
117
275
  });
118
276
  ```
119
277
 
278
+ Set to `0` to terminate immediately:
279
+
280
+ ```javascript
281
+ const testdriver = TestDriver(context, {
282
+ keepAlive: 0, // Terminate sandbox immediately on disconnect
283
+ });
284
+ ```
285
+
120
286
  ### Reconnecting to Existing Sandbox
121
287
 
122
288
  Speed up test development by reconnecting to an existing sandbox instead of starting fresh each time. This lets you iterate quickly on failing steps without re-running the entire test from the beginning.
@@ -65,31 +65,31 @@ describe("Chrome Extension Test", () => {
65
65
  expect(popupResult).toBeTruthy();
66
66
  });
67
67
 
68
- it("should load Loom from Chrome Web Store by extensionId", async (context) => {
69
- const testdriver = TestDriver(context, { ...getDefaults(context) });
70
-
71
- // Launch Chrome with Loom loaded by its Chrome Web Store ID
72
- // Loom ID: liecbddmkiiihnedobmlmillhodjkdmb
73
- await testdriver.provision.chromeExtension({
74
- extensionId: 'liecbddmkiiihnedobmlmillhodjkdmb'
75
- });
76
-
77
- // Navigate to testdriver.ai (extensions don't load on New Tab)
78
- const addressBar = await testdriver.find("Chrome address bar");
79
- await addressBar.click();
80
- await testdriver.type("testdriver.ai");
81
- await testdriver.pressKeys(["enter"]);
82
-
83
- // Wait for page to load
84
- const pageResult = await testdriver.assert("I can see testdriver.ai");
85
- expect(pageResult).toBeTruthy();
86
-
87
- // Click on the extensions button (puzzle piece icon) in Chrome toolbar
88
- const extensionsButton = await testdriver.find("The puzzle-shaped icon in the Chrome toolbar.", {zoom: true});
89
- await extensionsButton.click();
90
-
91
- // Look for Loom in the extensions menu
92
- const loomExtension = await testdriver.find("Loom extension in the extensions dropdown");
93
- expect(loomExtension.found()).toBeTruthy();
94
- });
68
+ // it("should load Loom from Chrome Web Store by extensionId", async (context) => {
69
+ // const testdriver = TestDriver(context, { ...getDefaults(context) });
70
+
71
+ // // Launch Chrome with Loom loaded by its Chrome Web Store ID
72
+ // // Loom ID: liecbddmkiiihnedobmlmillhodjkdmb
73
+ // await testdriver.provision.chromeExtension({
74
+ // extensionId: 'liecbddmkiiihnedobmlmillhodjkdmb'
75
+ // });
76
+
77
+ // // Navigate to testdriver.ai (extensions don't load on New Tab)
78
+ // const addressBar = await testdriver.find("Chrome address bar");
79
+ // await addressBar.click();
80
+ // await testdriver.type("testdriver.ai");
81
+ // await testdriver.pressKeys(["enter"]);
82
+
83
+ // // Wait for page to load
84
+ // const pageResult = await testdriver.assert("I can see testdriver.ai");
85
+ // expect(pageResult).toBeTruthy();
86
+
87
+ // // Click on the extensions button (puzzle piece icon) in Chrome toolbar
88
+ // const extensionsButton = await testdriver.find("The puzzle-shaped icon in the Chrome toolbar.", {zoom: true});
89
+ // await extensionsButton.click();
90
+
91
+ // // Look for Loom in the extensions menu
92
+ // const loomExtension = await testdriver.find("Loom extension in the extensions dropdown");
93
+ // expect(loomExtension.found()).toBeTruthy();
94
+ // });
95
95
  });
@@ -1142,7 +1142,10 @@ class TestDriverReporter {
1142
1142
 
1143
1143
  const suiteName = test.suite?.name;
1144
1144
  const startTime = Date.now() - duration; // Calculate start time from duration
1145
- const retryCount = result.retryCount || 0;
1145
+ // In Vitest v4, retryCount is on diagnostic(), not result()
1146
+ // result() only returns { state, errors }, while diagnostic() has retryCount, duration, etc.
1147
+ const diagnostic = test.diagnostic?.();
1148
+ const retryCount = diagnostic?.retryCount || 0;
1146
1149
  const testRunDbId = process.env.TD_TEST_RUN_DB_ID;
1147
1150
  const consoleUrl = getConsoleUrl(pluginState.apiRoot);
1148
1151
  const hasRetries = retryCount > 0 && dashcamUrls.length > 1;
package/lib/sentry.js CHANGED
@@ -16,7 +16,11 @@ const os = require("os");
16
16
  const { version } = require("../package.json");
17
17
  const logger = require("../agent/lib/logger");
18
18
 
19
- // Store the current session's trace context
19
+ // Store trace contexts per session so concurrent tests don't overwrite each other.
20
+ // Keys are sessionIds, values are { traceId, sessionId }.
21
+ const _traceContexts = new Map();
22
+
23
+ // For backward compatibility, track the most recently set session
20
24
  let currentTraceId = null;
21
25
  let currentSessionId = null;
22
26
 
@@ -177,7 +181,13 @@ function setSessionTraceContext(sessionId) {
177
181
  if (!isEnabled() || !sessionId) return;
178
182
 
179
183
  // Derive trace ID from session ID (same algorithm as API)
180
- currentTraceId = crypto.createHash("md5").update(sessionId).digest("hex");
184
+ const traceId = crypto.createHash("md5").update(sessionId).digest("hex");
185
+
186
+ // Store per-session trace context for concurrent safety
187
+ _traceContexts.set(sessionId, { traceId, sessionId });
188
+
189
+ // Also update the module-level "latest" for backward compatibility
190
+ currentTraceId = traceId;
181
191
  currentSessionId = sessionId;
182
192
 
183
193
  // Set as global tag so all events include it
@@ -203,9 +213,26 @@ function setSessionTraceContext(sessionId) {
203
213
  /**
204
214
  * Clear the session trace context
205
215
  */
206
- function clearSessionTraceContext() {
207
- currentTraceId = null;
208
- currentSessionId = null;
216
+ function clearSessionTraceContext(sessionId) {
217
+ if (sessionId) {
218
+ _traceContexts.delete(sessionId);
219
+ // If the cleared session was the "latest", pick another or null
220
+ if (currentSessionId === sessionId) {
221
+ if (_traceContexts.size > 0) {
222
+ const last = Array.from(_traceContexts.values()).pop();
223
+ currentTraceId = last.traceId;
224
+ currentSessionId = last.sessionId;
225
+ } else {
226
+ currentTraceId = null;
227
+ currentSessionId = null;
228
+ }
229
+ }
230
+ } else {
231
+ // Clear all (backward compatibility)
232
+ _traceContexts.clear();
233
+ currentTraceId = null;
234
+ currentSessionId = null;
235
+ }
209
236
  }
210
237
 
211
238
  /**
@@ -131,7 +131,10 @@ function forwardToAllSandboxes(args) {
131
131
  * reporter output).
132
132
  */
133
133
  function installConsoleSpy() {
134
- if (_consoleSpy.installed) return;
134
+ // Check both installed flag AND that spies are still valid.
135
+ // Guards against a race where cleanupConsoleSpy restores mocks (setting
136
+ // installed=false) while a new test is starting up concurrently.
137
+ if (_consoleSpy.installed && _consoleSpy.spies) return;
135
138
  _consoleSpy.installed = true;
136
139
 
137
140
  // Capture originals once — these are whatever console methods look like
@@ -211,33 +214,6 @@ function cleanupConsoleSpy(client) {
211
214
  const testDriverInstances = new WeakMap();
212
215
  const lifecycleHandlers = new WeakMap();
213
216
 
214
- // Set to track all active TestDriver instances for signal-based cleanup
215
- const activeInstances = new Set();
216
-
217
- // Register signal handlers once to clean up all active instances on forced exit
218
- let signalHandlersRegistered = false;
219
- function registerSignalHandlers() {
220
- if (signalHandlersRegistered) return;
221
- signalHandlersRegistered = true;
222
-
223
- const cleanup = async () => {
224
- const instances = Array.from(activeInstances);
225
- activeInstances.clear();
226
- await Promise.race([
227
- Promise.all(instances.map((inst) => inst.disconnect().catch(() => {}))),
228
- new Promise((resolve) => setTimeout(resolve, 5000)), // 5s max for cleanup
229
- ]);
230
- };
231
-
232
- process.on("SIGINT", () => {
233
- cleanup().finally(() => process.exit(130));
234
- });
235
-
236
- process.on("SIGTERM", () => {
237
- cleanup().finally(() => process.exit(143));
238
- });
239
- }
240
-
241
217
  /**
242
218
  * Create a TestDriver client in a Vitest test with automatic lifecycle management
243
219
  *
@@ -315,8 +291,6 @@ export function TestDriver(context, options = {}) {
315
291
  testdriver.__vitestContext = context.task;
316
292
  testdriver._debugOnFailure = mergedOptions.debugOnFailure || false;
317
293
  testDriverInstances.set(context.task, testdriver);
318
- activeInstances.add(testdriver);
319
- registerSignalHandlers();
320
294
 
321
295
  // Set platform metadata early so the reporter can show the correct OS from the start
322
296
  if (!context.task.meta) {
@@ -452,9 +426,6 @@ export function TestDriver(context, options = {}) {
452
426
  // Clean up console spies
453
427
  cleanupConsoleSpy(currentInstance);
454
428
 
455
- // Remove from active instances tracking (even in debug mode we clean up tracking)
456
- activeInstances.delete(currentInstance);
457
-
458
429
  // DO NOT disconnect or terminate - keep sandbox alive for debugging
459
430
  return;
460
431
  }
@@ -581,8 +552,6 @@ export function TestDriver(context, options = {}) {
581
552
  } catch (error) {
582
553
  console.error("Error disconnecting client:", error);
583
554
  } finally {
584
- // Remove from active instances tracking
585
- activeInstances.delete(currentInstance);
586
555
  // Terminate AWS instance if one was spawned for this test
587
556
  // This must happen AFTER dashcam.stop() to ensure recording is saved
588
557
  // AND it must happen even if disconnect() fails
@@ -115,16 +115,16 @@ function cleanupAllInstances() {
115
115
  process.on("exit", cleanupAllInstances);
116
116
  process.on("SIGINT", () => {
117
117
  cleanupAllInstances();
118
- process.exit(130); // Restore default SIGINT exit behavior (128 + signal 2)
118
+ // Don't call process.exit here - let the signal handler do its job
119
119
  });
120
120
  process.on("SIGTERM", () => {
121
121
  cleanupAllInstances();
122
- process.exit(143); // Restore default SIGTERM exit behavior (128 + signal 15)
122
+ // Don't call process.exit here - let the signal handler do its job
123
123
  });
124
124
  process.on("uncaughtException", (error) => {
125
125
  console.error("[TestDriver] Uncaught exception:", error);
126
126
  cleanupAllInstances();
127
- process.exit(1); // Exit after uncaught exception cleanup
127
+ // Don't call process.exit here - let Node.js handle the exception
128
128
  });
129
129
 
130
130
  beforeEach(async (context) => {
@@ -140,6 +140,14 @@ beforeEach(async (context) => {
140
140
  return;
141
141
  }
142
142
 
143
+ // If ip is provided via plugin options, skip spawning
144
+ const pluginIp = globalThis.__testdriverPlugin?.state?.testDriverOptions?.ip;
145
+ if (pluginIp) {
146
+ console.log(`[TestDriver] Using ip from plugin options: ${pluginIp}`);
147
+ context.ip = pluginIp;
148
+ return;
149
+ }
150
+
143
151
  if (!process.env.AWS_LAUNCH_TEMPLATE_ID || !process.env.AMI_ID) {
144
152
  throw new Error(
145
153
  "[TestDriver] TD_OS=windows requires AWS_LAUNCH_TEMPLATE_ID and AMI_ID environment variables",
@@ -81,6 +81,21 @@ beforeEach(async (context) => {
81
81
  return;
82
82
  }
83
83
 
84
+ // If TD_IP is already set, use it and skip spawning
85
+ if (process.env.TD_IP) {
86
+ console.log(`[TestDriver] Using existing instance at ${process.env.TD_IP}`);
87
+ context.ip = process.env.TD_IP;
88
+ return;
89
+ }
90
+
91
+ // If ip is provided via plugin options, skip spawning
92
+ const pluginIp = globalThis.__testdriverPlugin?.state?.testDriverOptions?.ip;
93
+ if (pluginIp) {
94
+ console.log(`[TestDriver] Using ip from plugin options: ${pluginIp}`);
95
+ context.ip = pluginIp;
96
+ return;
97
+ }
98
+
84
99
  // Verify AWS credentials are available
85
100
  if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_LAUNCH_TEMPLATE_ID || !process.env.AMI_ID) {
86
101
  throw new Error('[TestDriver] TD_OS=windows requires AWS credentials (AWS_ACCESS_KEY_ID, AWS_LAUNCH_TEMPLATE_ID, AMI_ID)');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "testdriverai",
3
- "version": "7.3.34",
3
+ "version": "7.3.36",
4
4
  "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
5
5
  "main": "sdk.js",
6
6
  "types": "sdk.d.ts",
@@ -23,7 +23,6 @@
23
23
  },
24
24
  "./vitest/setup": "./lib/vitest/setup.mjs",
25
25
  "./vitest/setup-aws": "./lib/vitest/setup-aws.mjs",
26
- "./vitest/setup-disable-defender": "./lib/vitest/setup-disable-defender.mjs",
27
26
  "./vitest/hooks": {
28
27
  "types": "./lib/vitest/hooks.d.ts",
29
28
  "default": "./lib/vitest/hooks.mjs"