testdriverai 7.3.8 → 7.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/agent/lib/commands.js +27 -2
  3. package/agent/lib/sandbox.js +2 -0
  4. package/docs/_data/examples-manifest.json +64 -64
  5. package/docs/v7/assert.mdx +31 -0
  6. package/docs/v7/client.mdx +29 -0
  7. package/docs/v7/examples/ai.mdx +1 -1
  8. package/docs/v7/examples/assert.mdx +1 -1
  9. package/docs/v7/examples/captcha-api.mdx +1 -1
  10. package/docs/v7/examples/chrome-extension.mdx +1 -1
  11. package/docs/v7/examples/drag-and-drop.mdx +1 -1
  12. package/docs/v7/examples/element-not-found.mdx +1 -1
  13. package/docs/v7/examples/hover-image.mdx +1 -1
  14. package/docs/v7/examples/hover-text.mdx +1 -1
  15. package/docs/v7/examples/installer.mdx +1 -1
  16. package/docs/v7/examples/launch-vscode-linux.mdx +1 -1
  17. package/docs/v7/examples/match-image.mdx +1 -1
  18. package/docs/v7/examples/press-keys.mdx +1 -1
  19. package/docs/v7/examples/scroll-keyboard.mdx +1 -1
  20. package/docs/v7/examples/scroll-until-image.mdx +1 -1
  21. package/docs/v7/examples/scroll-until-text.mdx +1 -1
  22. package/docs/v7/examples/scroll.mdx +1 -1
  23. package/docs/v7/examples/type.mdx +1 -1
  24. package/docs/v7/examples/windows-installer.mdx +1 -1
  25. package/docs/v7/find.mdx +24 -0
  26. package/examples/z_flake-shared.mjs +1 -0
  27. package/interfaces/vitest-plugin.d.ts +19 -5
  28. package/interfaces/vitest-plugin.mjs +87 -16
  29. package/lib/vitest/hooks.mjs +36 -7
  30. package/lib/vitest/setup.mjs +10 -8
  31. package/mcp-server/dist/server.mjs +2 -2
  32. package/mcp-server/src/server.ts +2 -2
  33. package/package.json +1 -1
  34. package/sdk.d.ts +19 -2
  35. package/sdk.js +182 -82
  36. package/vitest.config.mjs +1 -0
@@ -9,6 +9,7 @@ export function popupLoadingTest(label, options = {}) {
9
9
  describe(`Popup with Loading (${label})`, () => {
10
10
  it("should accept cookies and wait for completion", async (context) => {
11
11
  const testdriver = TestDriver(context, {
12
+ preview: 'ide',
12
13
  ip: context.ip || process.env.TD_IP,
13
14
  ...options,
14
15
  });
@@ -5,11 +5,20 @@
5
5
 
6
6
  import TestDriverSDK, { TestDriverOptions } from '../sdk';
7
7
 
8
+ /**
9
+ * A single dashcam URL entry for one retry attempt
10
+ */
11
+ export interface DashcamUrlEntry {
12
+ url: string | null;
13
+ platform: string;
14
+ attempt: number;
15
+ }
16
+
8
17
  /**
9
18
  * Plugin state object
10
19
  */
11
20
  export interface PluginState {
12
- dashcamUrls: Map<string, { url: string; platform: string }>;
21
+ dashcamUrls: Map<string, DashcamUrlEntry[]>;
13
22
  suiteTestRuns: Map<string, any>;
14
23
  testDriverOptions: TestDriverOptions;
15
24
  }
@@ -20,14 +29,19 @@ export interface PluginState {
20
29
  export const pluginState: PluginState;
21
30
 
22
31
  /**
23
- * Register a Dashcam URL for a test
32
+ * Register a Dashcam URL for a test attempt
33
+ */
34
+ export function registerDashcamUrl(testId: string, url: string, platform: string, attempt?: number): void;
35
+
36
+ /**
37
+ * Get the latest Dashcam URL entry for a test (backward compatible)
24
38
  */
25
- export function registerDashcamUrl(testId: string, url: string, platform: string): void;
39
+ export function getDashcamUrl(testId: string): DashcamUrlEntry | undefined;
26
40
 
27
41
  /**
28
- * Get Dashcam URL for a test
42
+ * Get all Dashcam URL entries for a test (all retry attempts)
29
43
  */
30
- export function getDashcamUrl(testId: string): { url: string; platform: string } | undefined;
44
+ export function getAllDashcamUrls(testId: string): DashcamUrlEntry[];
31
45
 
32
46
  /**
33
47
  * Clear all Dashcam URLs
@@ -9,6 +9,29 @@ import { setTestRunInfo } from "./shared-test-state.mjs";
9
9
  // Use createRequire to import CommonJS modules without esbuild processing
10
10
  const require = createRequire(import.meta.url);
11
11
 
12
+ /**
13
+ * Resolve the TestDriver SDK version using multiple strategies.
14
+ * Similar to resolveVitestVersion(), guards against import.meta.url rewriting.
15
+ * @returns {string|null}
16
+ */
17
+ function resolveTestDriverVersion() {
18
+ try {
19
+ return require("../package.json").version;
20
+ } catch {}
21
+
22
+ try {
23
+ const cwdRequire = createRequire(path.join(process.cwd(), "package.json"));
24
+ return cwdRequire("testdriverai/package.json").version;
25
+ } catch {}
26
+
27
+ try {
28
+ const pkgPath = path.join(process.cwd(), "node_modules", "testdriverai", "package.json");
29
+ return JSON.parse(fs.readFileSync(pkgPath, "utf8")).version;
30
+ } catch {}
31
+
32
+ return null;
33
+ }
34
+
12
35
  /**
13
36
  * Minimum required Vitest major version
14
37
  */
@@ -170,21 +193,32 @@ export const pluginState = {
170
193
  // TestDriver options to pass to all instances
171
194
  testDriverOptions: {},
172
195
  // Dashcam URL tracking (in-memory, no files needed!)
173
- dashcamUrls: new Map(), // testId -> dashcamUrl
196
+ dashcamUrls: new Map(), // testId -> [{url, platform, attempt}]
174
197
  lastDashcamUrl: null, // Fallback for when test ID isn't available
175
198
  // Suite-level test run tracking
176
199
  suiteTestRuns: new Map(), // suiteId -> { runId, testRunDbId, token }
177
200
  };
178
201
 
179
202
  // Export functions that can be used by the reporter or tests
180
- export function registerDashcamUrl(testId, url, platform) {
181
- logger.debug(`Registering dashcam URL for test ${testId}:`, url);
182
- pluginState.dashcamUrls.set(testId, { url, platform });
203
+ export function registerDashcamUrl(testId, url, platform, attempt) {
204
+ logger.debug(`Registering dashcam URL for test ${testId} (attempt ${attempt || 1}):`, url);
205
+ // Support multiple attempts per test - store as array
206
+ if (!pluginState.dashcamUrls.has(testId)) {
207
+ pluginState.dashcamUrls.set(testId, []);
208
+ }
209
+ pluginState.dashcamUrls.get(testId).push({ url, platform, attempt: attempt || 1 });
183
210
  pluginState.lastDashcamUrl = url;
184
211
  }
185
212
 
186
213
  export function getDashcamUrl(testId) {
187
- return pluginState.dashcamUrls.get(testId);
214
+ const entries = pluginState.dashcamUrls.get(testId);
215
+ if (!entries) return undefined;
216
+ // Return the last entry for backward compatibility (single URL callers)
217
+ return entries[entries.length - 1];
218
+ }
219
+
220
+ export function getAllDashcamUrls(testId) {
221
+ return pluginState.dashcamUrls.get(testId) || [];
188
222
  }
189
223
 
190
224
  export function clearDashcamUrls() {
@@ -743,6 +777,17 @@ class TestDriverReporter {
743
777
  // Default to linux if no tests write platform info
744
778
  testRunData.platform = "linux";
745
779
 
780
+ // Send version metadata
781
+ testRunData.nodeVersion = process.version;
782
+ const tdVer = resolveTestDriverVersion();
783
+ if (tdVer) {
784
+ testRunData.testDriverVersion = tdVer;
785
+ }
786
+ const vitestVer = resolveVitestVersion();
787
+ if (vitestVer) {
788
+ testRunData.vitestVersion = vitestVer;
789
+ }
790
+
746
791
  logger.debug("Creating test run with data:", JSON.stringify(testRunData));
747
792
  pluginState.testRun = await createTestRun(testRunData);
748
793
  logger.debug("Test run created:", JSON.stringify(pluginState.testRun));
@@ -929,6 +974,7 @@ class TestDriverReporter {
929
974
  logger.debug(`Test meta for ${test.id}:`, meta);
930
975
 
931
976
  const dashcamUrl = meta.dashcamUrl || null;
977
+ const dashcamUrls = meta.dashcamUrls || []; // Per-attempt URLs
932
978
  const sessionId = meta.sessionId || null;
933
979
  const platform = meta.platform || null;
934
980
  const sandboxId = meta.sandboxId || null;
@@ -986,8 +1032,12 @@ class TestDriverReporter {
986
1032
 
987
1033
  const suiteName = test.suite?.name;
988
1034
  const startTime = Date.now() - duration; // Calculate start time from duration
1035
+ const retryCount = result.retryCount || 0;
1036
+ const testRunDbId = process.env.TD_TEST_RUN_DB_ID;
1037
+ const consoleUrl = getConsoleUrl(pluginState.apiRoot);
1038
+ const hasRetries = retryCount > 0 && dashcamUrls.length > 1;
989
1039
 
990
- // Record test case with all metadata
1040
+ // Record a single test case with all metadata
991
1041
  const testCaseData = {
992
1042
  runId: testRunId,
993
1043
  testName: test.name,
@@ -997,7 +1047,7 @@ class TestDriverReporter {
997
1047
  startTime: startTime,
998
1048
  endTime: Date.now(),
999
1049
  duration: duration,
1000
- retries: result.retryCount || 0,
1050
+ retries: retryCount,
1001
1051
  };
1002
1052
 
1003
1053
  // Add sessionId if available
@@ -1010,6 +1060,13 @@ class TestDriverReporter {
1010
1060
  testCaseData.replayUrl = dashcamUrl;
1011
1061
  }
1012
1062
 
1063
+ // Include per-attempt replay URLs for retry visibility
1064
+ if (dashcamUrls.length > 0) {
1065
+ const attemptUrls = dashcamUrls
1066
+ .map(a => ({ attempt: a.attempt, url: a.url || null, sessionId: a.sessionId || null }));
1067
+ testCaseData.replayUrls = attemptUrls;
1068
+ }
1069
+
1013
1070
  if (suiteName) testCaseData.suiteName = suiteName;
1014
1071
  if (errorMessage) testCaseData.errorMessage = errorMessage;
1015
1072
  if (errorStack) testCaseData.errorStack = errorStack;
@@ -1025,7 +1082,6 @@ class TestDriverReporter {
1025
1082
  );
1026
1083
 
1027
1084
  const testCaseDbId = testCaseResponse.data?.id;
1028
- const testRunDbId = process.env.TD_TEST_RUN_DB_ID;
1029
1085
 
1030
1086
  // Store test case data for GitHub comment generation
1031
1087
  pluginState.recordedTestCases.push({
@@ -1035,14 +1091,25 @@ class TestDriverReporter {
1035
1091
 
1036
1092
  console.log("");
1037
1093
  console.log(
1038
- `🔗 Test Report: ${getConsoleUrl(pluginState.apiRoot)}/runs/${testRunDbId}/${testCaseDbId}`,
1094
+ `🔗 Test Report: ${consoleUrl}/runs/${testRunDbId}/${testCaseDbId}`,
1039
1095
  );
1096
+
1097
+ // If there were retries, list all per-attempt dashcam URLs for debugging
1098
+ if (hasRetries) {
1099
+ const validAttempts = dashcamUrls.filter(a => a.url);
1100
+ if (validAttempts.length > 0) {
1101
+ console.log(`📋 Retry attempts (${dashcamUrls.length} total):`);
1102
+ for (const attempt of validAttempts) {
1103
+ console.log(` Attempt ${attempt.attempt}: ${attempt.url}`);
1104
+ }
1105
+ }
1106
+ }
1040
1107
 
1041
1108
  // Output parseable format for docs generation (examples only)
1042
1109
  if (testFile.startsWith("examples/")) {
1043
1110
  const testFileName = path.basename(testFile);
1044
1111
  console.log(
1045
- `TESTDRIVER_EXAMPLE_URL::${testFileName}::${getConsoleUrl(pluginState.apiRoot)}/runs/${testRunDbId}/${testCaseDbId}`,
1112
+ `TESTDRIVER_EXAMPLE_URL::${testFileName}::${consoleUrl}/runs/${testRunDbId}/${testCaseDbId}`,
1046
1113
  );
1047
1114
  }
1048
1115
  } catch (error) {
@@ -1099,12 +1166,16 @@ function getPlatform() {
1099
1166
  }
1100
1167
 
1101
1168
  // Try to get platform from dashcam URLs (registered during test cleanup)
1102
- for (const [, data] of pluginState.dashcamUrls) {
1103
- if (data.platform) {
1104
- logger.debug(
1105
- `Using platform from dashcam URL registration: ${data.platform}`,
1106
- );
1107
- return data.platform;
1169
+ for (const [, entries] of pluginState.dashcamUrls) {
1170
+ // entries is now an array of {url, platform, attempt}
1171
+ const arr = Array.isArray(entries) ? entries : [entries];
1172
+ for (const data of arr) {
1173
+ if (data.platform) {
1174
+ logger.debug(
1175
+ `Using platform from dashcam URL registration: ${data.platform}`,
1176
+ );
1177
+ return data.platform;
1178
+ }
1108
1179
  }
1109
1180
  }
1110
1181
 
@@ -415,11 +415,29 @@ export function TestDriver(context, options = {}) {
415
415
  context.task.meta.testOrder = 0;
416
416
  context.task.meta.sessionId = currentInstance.getSessionId?.() || null;
417
417
 
418
+ // Initialize dashcamUrls array for tracking per-attempt URLs (persists across retries)
419
+ if (!context.task.meta.dashcamUrls) {
420
+ context.task.meta.dashcamUrls = [];
421
+ }
422
+
423
+ // Determine the current attempt number (1-based)
424
+ const attemptNumber = context.task.meta.dashcamUrls.length + 1;
425
+ const isRetry = attemptNumber > 1;
426
+ const attemptLabel = isRetry ? ` (attempt ${attemptNumber})` : "";
427
+
418
428
  // Stop dashcam if it was started - with timeout to prevent hanging
419
429
  if (currentInstance._dashcam && currentInstance._dashcam.recording) {
420
430
  try {
421
431
  const dashcamUrl = await currentInstance.dashcam.stop();
422
- // Add dashcam URL to metadata
432
+
433
+ // Track this attempt's URL in the per-attempt array
434
+ context.task.meta.dashcamUrls.push({
435
+ attempt: attemptNumber,
436
+ url: dashcamUrl || null,
437
+ sessionId: currentInstance.getSessionId?.() || null,
438
+ });
439
+
440
+ // Keep backward compatibility - last attempt's URL
423
441
  context.task.meta.dashcamUrl = dashcamUrl || null;
424
442
 
425
443
  // Also register in memory if plugin is available (for cross-process scenarios)
@@ -428,16 +446,15 @@ export function TestDriver(context, options = {}) {
428
446
  context.task.id,
429
447
  dashcamUrl,
430
448
  platform,
449
+ attemptNumber,
431
450
  );
432
451
  }
433
452
 
434
- const debugMode =
435
- process.env.VERBOSE || process.env.DEBUG || process.env.TD_DEBUG;
436
-
437
- if (debugMode) {
453
+ // Always print the dashcam URL for each attempt so it's visible in logs
454
+ if (dashcamUrl) {
438
455
  console.log("");
439
456
  console.log(
440
- "🎥" + chalk.yellow(` Dashcam URL`) + `: ${dashcamUrl}`,
457
+ "🎥" + chalk.yellow(` Dashcam URL${attemptLabel}`) + `: ${dashcamUrl}`,
441
458
  );
442
459
  console.log("");
443
460
  }
@@ -461,11 +478,23 @@ export function TestDriver(context, options = {}) {
461
478
  if (currentInstance._dashcam) {
462
479
  currentInstance._dashcam.recording = false;
463
480
  }
481
+ // Track failed attempt
482
+ context.task.meta.dashcamUrls.push({
483
+ attempt: attemptNumber,
484
+ url: null,
485
+ sessionId: currentInstance.getSessionId?.() || null,
486
+ error: error.message,
487
+ });
464
488
  // Ensure dashcamUrl is set to null if stop failed
465
489
  context.task.meta.dashcamUrl = null;
466
490
  }
467
491
  } else {
468
- // No dashcam recording, set URL to null explicitly
492
+ // No dashcam recording - still track the attempt
493
+ context.task.meta.dashcamUrls.push({
494
+ attempt: attemptNumber,
495
+ url: null,
496
+ sessionId: currentInstance.getSessionId?.() || null,
497
+ });
469
498
  context.task.meta.dashcamUrl = null;
470
499
  }
471
500
 
@@ -15,14 +15,15 @@
15
15
  */
16
16
 
17
17
  import {
18
- clearDashcamUrls,
19
- clearSuiteTestRun,
20
- getDashcamUrl,
21
- getPluginState,
22
- getSuiteTestRun,
23
- pluginState,
24
- registerDashcamUrl,
25
- setSuiteTestRun,
18
+ clearDashcamUrls,
19
+ clearSuiteTestRun,
20
+ getAllDashcamUrls,
21
+ getDashcamUrl,
22
+ getPluginState,
23
+ getSuiteTestRun,
24
+ pluginState,
25
+ registerDashcamUrl,
26
+ setSuiteTestRun,
26
27
  } from '../../interfaces/vitest-plugin.mjs';
27
28
 
28
29
  // Set up global TestDriver plugin interface
@@ -31,6 +32,7 @@ globalThis.__testdriverPlugin = {
31
32
  state: pluginState,
32
33
  registerDashcamUrl,
33
34
  getDashcamUrl,
35
+ getAllDashcamUrls,
34
36
  clearDashcamUrls,
35
37
  getPluginState,
36
38
  getSuiteTestRun,
@@ -401,8 +401,8 @@ Debug mode (connect to existing sandbox):
401
401
  const TestDriverSDK = (await import("../../sdk.js")).default;
402
402
  // Determine preview mode from environment variable
403
403
  // TD_PREVIEW can be "ide", "browser", or "none"
404
- // Default to "none" for MCP server (headless) unless explicitly set
405
- const previewMode = process.env.TD_PREVIEW || "none";
404
+ // Default to "ide" so the live preview shows within the IDE
405
+ const previewMode = process.env.TD_PREVIEW || "ide";
406
406
  logger.debug("session_start: Preview mode", { preview: previewMode });
407
407
  // Get IP from params or environment (for self-hosted instances)
408
408
  const instanceIp = params.ip || process.env.TD_IP;
@@ -509,8 +509,8 @@ Debug mode (connect to existing sandbox):
509
509
 
510
510
  // Determine preview mode from environment variable
511
511
  // TD_PREVIEW can be "ide", "browser", or "none"
512
- // Default to "none" for MCP server (headless) unless explicitly set
513
- const previewMode = process.env.TD_PREVIEW || "none";
512
+ // Default to "ide" so the live preview shows within the IDE
513
+ const previewMode = process.env.TD_PREVIEW || "ide";
514
514
  logger.debug("session_start: Preview mode", { preview: previewMode });
515
515
 
516
516
  // Get IP from params or environment (for self-hosted instances)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "testdriverai",
3
- "version": "7.3.8",
3
+ "version": "7.3.10",
4
4
  "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
5
5
  "main": "sdk.js",
6
6
  "types": "sdk.d.ts",
package/sdk.d.ts CHANGED
@@ -230,6 +230,8 @@ export interface TestDriverOptions {
230
230
  logging?: boolean;
231
231
  /** Enable/disable cache (default: true). Set to false to force regeneration on all find operations */
232
232
  cache?: boolean;
233
+ /** Global AI sampling configuration. Can be overridden per find() or assert() call. */
234
+ ai?: AIConfig;
233
235
  /** Cache threshold configuration for different methods */
234
236
  cacheThreshold?: {
235
237
  /** Threshold for find operations (default: 0.05 = 5% difference, 95% similarity) */
@@ -546,6 +548,19 @@ export interface FocusApplicationOptions {
546
548
  name: string;
547
549
  }
548
550
 
551
+ /** AI sampling configuration for controlling model behavior */
552
+ export interface AIConfig {
553
+ /** Temperature for AI sampling (0 = deterministic, higher = more creative). Default: 0 for find verification, model default for assert. */
554
+ temperature?: number;
555
+ /** Top-P and Top-K sampling parameters */
556
+ top?: {
557
+ /** Top-P (nucleus sampling). Controls diversity by limiting to top P probability mass. Range: 0-1. */
558
+ p?: number;
559
+ /** Top-K sampling. Limits choices to top K tokens. 1 = always pick most likely. 0 = disabled. */
560
+ k?: number;
561
+ };
562
+ }
563
+
549
564
  /** Options for extract command */
550
565
  export interface ExtractOptions {
551
566
  /** What to extract */
@@ -564,6 +579,8 @@ export interface AssertOptions {
564
579
  os?: string;
565
580
  /** Screen resolution for cache partitioning */
566
581
  resolution?: string;
582
+ /** AI sampling configuration (overrides global ai config) */
583
+ ai?: AIConfig;
567
584
  }
568
585
 
569
586
  /** Options for exec command */
@@ -1028,7 +1045,7 @@ export default class TestDriverSDK {
1028
1045
  find(description: string, cacheThreshold?: number): ChainableElementPromise;
1029
1046
  find(
1030
1047
  description: string,
1031
- options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number },
1048
+ options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number; ai?: AIConfig },
1032
1049
  ): ChainableElementPromise;
1033
1050
 
1034
1051
  /**
@@ -1267,7 +1284,7 @@ export default class TestDriverSDK {
1267
1284
  * // With custom threshold
1268
1285
  * await client.assert('the page loaded', { threshold: 0.01, cacheKey: 'login-test' });
1269
1286
  */
1270
- assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string }): Promise<boolean>;
1287
+ assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string; ai?: AIConfig }): Promise<boolean>;
1271
1288
 
1272
1289
  /**
1273
1290
  * Extract information from the screen using AI