testdriverai 7.3.11 → 7.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.github/skills/testdriver:ai/SKILL.md +204 -0
  2. package/.github/skills/testdriver:assert/SKILL.md +284 -0
  3. package/.github/skills/testdriver:aws-setup/SKILL.md +515 -0
  4. package/.github/skills/testdriver:caching/SKILL.md +124 -0
  5. package/.github/skills/testdriver:captcha/SKILL.md +159 -0
  6. package/.github/skills/testdriver:ci-cd/SKILL.md +602 -0
  7. package/.github/skills/testdriver:click/SKILL.md +286 -0
  8. package/.github/skills/testdriver:client/SKILL.md +339 -0
  9. package/.github/skills/testdriver:cloud/SKILL.md +119 -0
  10. package/.github/skills/testdriver:customizing-devices/SKILL.md +153 -0
  11. package/.github/skills/testdriver:dashcam/SKILL.md +418 -0
  12. package/.github/skills/testdriver:debugging-with-screenshots/SKILL.md +271 -0
  13. package/.github/skills/testdriver:device-config/SKILL.md +317 -0
  14. package/.github/skills/testdriver:double-click/SKILL.md +102 -0
  15. package/.github/skills/testdriver:elements/SKILL.md +605 -0
  16. package/.github/skills/testdriver:enterprise/SKILL.md +114 -0
  17. package/.github/skills/testdriver:examples/SKILL.md +7 -0
  18. package/.github/skills/testdriver:exec/SKILL.md +345 -0
  19. package/.github/skills/testdriver:find/SKILL.md +721 -0
  20. package/.github/skills/testdriver:focus-application/SKILL.md +293 -0
  21. package/.github/skills/testdriver:generating-tests/SKILL.md +36 -0
  22. package/.github/skills/testdriver:hover/SKILL.md +278 -0
  23. package/.github/skills/testdriver:locating-elements/SKILL.md +71 -0
  24. package/.github/skills/testdriver:making-assertions/SKILL.md +32 -0
  25. package/.github/skills/testdriver:mcp-workflow/SKILL.md +410 -0
  26. package/.github/skills/testdriver:mouse-down/SKILL.md +161 -0
  27. package/.github/skills/testdriver:mouse-up/SKILL.md +164 -0
  28. package/.github/skills/testdriver:performing-actions/SKILL.md +51 -0
  29. package/.github/skills/testdriver:press-keys/SKILL.md +348 -0
  30. package/.github/skills/testdriver:quickstart/SKILL.md +161 -0
  31. package/.github/skills/testdriver:reusable-code/SKILL.md +240 -0
  32. package/.github/skills/testdriver:right-click/SKILL.md +123 -0
  33. package/.github/skills/testdriver:running-tests/SKILL.md +181 -0
  34. package/.github/skills/testdriver:screenshot/SKILL.md +167 -0
  35. package/.github/skills/testdriver:scroll/SKILL.md +299 -0
  36. package/.github/skills/testdriver:secrets/SKILL.md +115 -0
  37. package/.github/skills/testdriver:self-hosted/SKILL.md +65 -0
  38. package/.github/skills/testdriver:test-writer/SKILL.md +451 -0
  39. package/.github/skills/testdriver:testdriver/SKILL.md +523 -0
  40. package/.github/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
  41. package/.github/skills/testdriver:type/SKILL.md +357 -0
  42. package/.github/skills/testdriver:variables/SKILL.md +111 -0
  43. package/.github/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
  44. package/.github/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
  45. package/.github/workflows/acceptance-windows-scheduled.yaml +6 -1
  46. package/.github/workflows/acceptance.yaml +0 -36
  47. package/.github/workflows/update-examples.yaml +53 -0
  48. package/CHANGELOG.md +8 -0
  49. package/agent/events.js +1 -0
  50. package/agent/index.js +8 -0
  51. package/agent/lib/commands.js +48 -29
  52. package/agent/lib/redraw.js +3 -1
  53. package/agent/lib/sandbox.js +166 -14
  54. package/agent/lib/sdk.js +142 -3
  55. package/agent/lib/system.js +4 -6
  56. package/ai/skills/testdriver:ai/SKILL.md +204 -0
  57. package/ai/skills/testdriver:assert/SKILL.md +315 -0
  58. package/ai/skills/testdriver:aws-setup/SKILL.md +448 -0
  59. package/ai/skills/testdriver:caching/SKILL.md +124 -0
  60. package/ai/skills/testdriver:captcha/SKILL.md +159 -0
  61. package/ai/skills/testdriver:ci-cd/SKILL.md +602 -0
  62. package/ai/skills/testdriver:click/SKILL.md +286 -0
  63. package/ai/skills/testdriver:client/SKILL.md +372 -0
  64. package/ai/skills/testdriver:cloud/SKILL.md +119 -0
  65. package/ai/skills/testdriver:customizing-devices/SKILL.md +153 -0
  66. package/ai/skills/testdriver:dashcam/SKILL.md +418 -0
  67. package/ai/skills/testdriver:debugging-with-screenshots/SKILL.md +401 -0
  68. package/ai/skills/testdriver:device-config/SKILL.md +317 -0
  69. package/ai/skills/testdriver:double-click/SKILL.md +102 -0
  70. package/ai/skills/testdriver:elements/SKILL.md +605 -0
  71. package/ai/skills/testdriver:enterprise/SKILL.md +114 -0
  72. package/ai/skills/testdriver:examples/SKILL.md +7 -0
  73. package/ai/skills/testdriver:exec/SKILL.md +345 -0
  74. package/ai/skills/testdriver:find/SKILL.md +745 -0
  75. package/ai/skills/testdriver:focus-application/SKILL.md +293 -0
  76. package/ai/skills/testdriver:generating-tests/SKILL.md +36 -0
  77. package/ai/skills/testdriver:hover/SKILL.md +278 -0
  78. package/ai/skills/testdriver:locating-elements/SKILL.md +71 -0
  79. package/ai/skills/testdriver:making-assertions/SKILL.md +32 -0
  80. package/ai/skills/testdriver:mcp-workflow/SKILL.md +410 -0
  81. package/ai/skills/testdriver:mouse-down/SKILL.md +161 -0
  82. package/ai/skills/testdriver:mouse-up/SKILL.md +164 -0
  83. package/ai/skills/testdriver:ocr/SKILL.md +235 -0
  84. package/ai/skills/testdriver:performing-actions/SKILL.md +51 -0
  85. package/ai/skills/testdriver:press-keys/SKILL.md +348 -0
  86. package/ai/skills/testdriver:quickstart/SKILL.md +146 -0
  87. package/ai/skills/testdriver:reusable-code/SKILL.md +240 -0
  88. package/ai/skills/testdriver:right-click/SKILL.md +123 -0
  89. package/ai/skills/testdriver:running-tests/SKILL.md +185 -0
  90. package/ai/skills/testdriver:screenshot/SKILL.md +248 -0
  91. package/ai/skills/testdriver:scroll/SKILL.md +335 -0
  92. package/ai/skills/testdriver:secrets/SKILL.md +115 -0
  93. package/ai/skills/testdriver:self-hosted/SKILL.md +65 -0
  94. package/ai/skills/testdriver:test-writer/SKILL.md +451 -0
  95. package/ai/skills/testdriver:testdriver/SKILL.md +631 -0
  96. package/ai/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
  97. package/ai/skills/testdriver:type/SKILL.md +357 -0
  98. package/ai/skills/testdriver:variables/SKILL.md +111 -0
  99. package/ai/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
  100. package/ai/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
  101. package/debugger/index.html +12 -2
  102. package/docs/v7/examples/scroll-keyboard.mdx +1 -1
  103. package/docs/v7/find.mdx +1 -0
  104. package/examples/config.mjs +1 -1
  105. package/examples/findall-coffee-icons.test.mjs +42 -0
  106. package/examples/flake-diffthreshold-001.test.mjs +9 -0
  107. package/examples/flake-diffthreshold-01.test.mjs +9 -0
  108. package/examples/flake-diffthreshold-05.test.mjs +9 -0
  109. package/examples/{z_flake-noredraw-cache.test.mjs → flake-noredraw-cache.test.mjs} +2 -2
  110. package/examples/{z_flake-noredraw-nocache.test.mjs → flake-noredraw-nocache.test.mjs} +2 -2
  111. package/examples/{z_flake-redraw-cache.test.mjs → flake-redraw-cache.test.mjs} +2 -2
  112. package/examples/{z_flake-redraw-nocache.test.mjs → flake-redraw-nocache.test.mjs} +2 -2
  113. package/examples/flake-rocket-match.test.mjs +30 -0
  114. package/examples/{z_flake-shared.mjs → flake-shared.mjs} +2 -2
  115. package/examples/parse.test.mjs +19 -0
  116. package/examples/scroll-keyboard.test.mjs +1 -1
  117. package/interfaces/cli/lib/base.js +6 -0
  118. package/interfaces/logger.js +51 -13
  119. package/interfaces/vitest-plugin.mjs +137 -0
  120. package/lib/core/index.d.ts +22 -0
  121. package/lib/init-project.js +105 -6
  122. package/lib/vitest/hooks.mjs +2 -5
  123. package/lib/vitest/setup-disable-defender.mjs +52 -0
  124. package/package.json +2 -1
  125. package/sdk-log-formatter.js +90 -0
  126. package/sdk.d.ts +88 -51
  127. package/sdk.js +128 -21
  128. package/setup/aws/disable-defender.sh +42 -0
  129. package/vitest.config.mjs +1 -3
  130. package/examples/z_flake-diffthreshold-001.test.mjs +0 -9
  131. package/examples/z_flake-diffthreshold-01.test.mjs +0 -9
  132. package/examples/z_flake-diffthreshold-05.test.mjs +0 -9
  133. /package/{examples → manual}/captcha-api.test.mjs +0 -0
@@ -1,6 +1,96 @@
1
1
  const fs = require("fs");
2
2
  const path = require("path");
3
- const { execSync } = require("child_process");
3
+ const { execSync, spawn } = require("child_process");
4
+
5
+ /**
6
+ * Run an npm install command with an animated progress bar
7
+ * @param {string} cmd - The command to run (e.g. "npm")
8
+ * @param {string[]} args - Command arguments
9
+ * @param {string} cwd - Working directory
10
+ * @param {string} label - Label to show (e.g. "vitest testdriverai")
11
+ * @returns {Promise<void>}
12
+ */
13
+ function runInstall(cmd, args, cwd, label) {
14
+ return new Promise((resolve, reject) => {
15
+ const child = spawn(cmd, args, {
16
+ cwd,
17
+ stdio: ["ignore", "pipe", "pipe"],
18
+ shell: process.platform === "win32",
19
+ });
20
+
21
+ const spinnerFrames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
22
+ const barWidth = 20;
23
+ let frame = 0;
24
+ let status = "resolving";
25
+ let filled = 0;
26
+
27
+ // Parse npm stderr for progress hints
28
+ const handleData = (data) => {
29
+ const text = data.toString();
30
+ if (text.includes("idealTree")) {
31
+ status = "resolving packages";
32
+ filled = Math.max(filled, 3);
33
+ } else if (text.includes("reify:")) {
34
+ status = "installing";
35
+ filled = Math.max(filled, 8);
36
+ // Try to extract package name from reify output
37
+ const match = text.match(/reify:([^\s:]+)/);
38
+ if (match) {
39
+ status = `installing ${match[1]}`;
40
+ }
41
+ } else if (text.includes("timing")) {
42
+ filled = Math.max(filled, 14);
43
+ status = "finalizing";
44
+ } else if (text.includes("added")) {
45
+ filled = barWidth;
46
+ status = "done";
47
+ }
48
+ // Slowly increment to show activity
49
+ if (filled < barWidth - 2) {
50
+ filled = Math.min(filled + 1, barWidth - 2);
51
+ }
52
+ };
53
+
54
+ child.stdout.on("data", handleData);
55
+ child.stderr.on("data", handleData);
56
+
57
+ const isTTY = process.stderr.isTTY;
58
+
59
+ const interval = setInterval(() => {
60
+ frame = (frame + 1) % spinnerFrames.length;
61
+ const spinner = spinnerFrames[frame];
62
+ const bar = "█".repeat(filled) + "░".repeat(barWidth - filled);
63
+ const line = ` ${spinner} ${label} [${bar}] ${status}`;
64
+ if (isTTY) {
65
+ process.stderr.clearLine(0);
66
+ process.stderr.cursorTo(0);
67
+ process.stderr.write(line);
68
+ }
69
+ }, 80);
70
+
71
+ child.on("close", (code) => {
72
+ clearInterval(interval);
73
+ if (isTTY) {
74
+ process.stderr.clearLine(0);
75
+ process.stderr.cursorTo(0);
76
+ }
77
+ if (code === 0) {
78
+ resolve();
79
+ } else {
80
+ reject(new Error(`${cmd} ${args.join(" ")} exited with code ${code}`));
81
+ }
82
+ });
83
+
84
+ child.on("error", (err) => {
85
+ clearInterval(interval);
86
+ if (isTTY) {
87
+ process.stderr.clearLine(0);
88
+ process.stderr.cursorTo(0);
89
+ }
90
+ reject(err);
91
+ });
92
+ });
93
+ }
4
94
 
5
95
  /**
6
96
  * Initialize a TestDriver project with all necessary files and configuration
@@ -378,6 +468,16 @@ jobs:
378
468
  if (copiedCount > 0) {
379
469
  progress(`✓ Copied ${copiedCount} agent(s) to .github/agents/`);
380
470
  }
471
+
472
+ // Also set testdriver.md as copilot-instructions.md if it doesn't already exist
473
+ const copilotInstructionsPath = path.join(targetDir, ".github", "copilot-instructions.md");
474
+ const testdriverAgentSource = path.join(agentsSourceDir, "testdriver.md");
475
+ if (!fs.existsSync(copilotInstructionsPath) && fs.existsSync(testdriverAgentSource)) {
476
+ fs.copyFileSync(testdriverAgentSource, copilotInstructionsPath);
477
+ progress("✓ Created .github/copilot-instructions.md");
478
+ } else if (fs.existsSync(copilotInstructionsPath)) {
479
+ progress("⊘ copilot-instructions.md already exists, skipping");
480
+ }
381
481
  } else {
382
482
  progress("⚠ Agents directory not found (will be available after npm install)");
383
483
  }
@@ -409,11 +509,10 @@ jobs:
409
509
  if (!options.skipInstall) {
410
510
  progress("\n📦 Installing dependencies...");
411
511
  try {
412
- execSync("npm install -D vitest testdriverai && npm install dotenv", {
413
- cwd: targetDir,
414
- stdio: "pipe",
415
- });
416
- progress("✓ Dependencies installed successfully");
512
+ await runInstall("npm", ["install", "-D", "vitest", "testdriverai"], targetDir, "vitest testdriverai");
513
+ progress("✓ Installed vitest, testdriverai");
514
+ await runInstall("npm", ["install", "dotenv"], targetDir, "dotenv");
515
+ progress("✓ Installed dotenv");
417
516
  } catch (error) {
418
517
  errors.push("Failed to install dependencies. Run manually:");
419
518
  errors.push(" npm install -D vitest testdriverai");
@@ -324,11 +324,8 @@ export function TestDriver(context, options = {}) {
324
324
  // Add testdriver log to dashcam tracking
325
325
  await testdriver.dashcam.addFileLog(logPath, "TestDriver Log");
326
326
 
327
- // Add web log tracking before starting dashcam
328
- await testdriver.dashcam.addWebLog("**", "Web Logs");
329
-
330
- // Start dashcam recording
331
- await testdriver.dashcam.start();
327
+ // Web log tracking and dashcam.start() are handled by provision.chrome()
328
+ // This ensures addWebLog is called with the domain pattern BEFORE dashcam.start()
332
329
  }
333
330
  })();
334
331
 
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Post-spawn hook to disable Windows Defender
3
+ *
4
+ * Usage in vitest.config.mjs:
5
+ * ```js
6
+ * setupFiles: [
7
+ * 'testdriverai/vitest/setup',
8
+ * 'testdriverai/vitest/setup-aws',
9
+ * 'testdriverai/vitest/setup-disable-defender'
10
+ * ]
11
+ * ```
12
+ */
13
+
14
+ import { execSync } from 'child_process';
15
+ import { dirname, join } from 'path';
16
+ import { fileURLToPath } from 'url';
17
+ import { beforeEach } from 'vitest';
18
+
19
+ const __filename = fileURLToPath(import.meta.url);
20
+ const __dirname = dirname(__filename);
21
+
22
+ beforeEach(async (context) => {
23
+ // Only run if we have an instance IP (self-hosted mode)
24
+ if (!context.ip) return;
25
+
26
+ // Get instance ID from global state set by setup-aws
27
+ const instanceInfo = globalThis.__testdriverAWS?.instances?.get(context.task.id);
28
+ if (!instanceInfo?.instanceId) {
29
+ console.warn('[TestDriver] No instance ID found, skipping Defender disable');
30
+ return;
31
+ }
32
+
33
+ const { instanceId, awsRegion } = instanceInfo;
34
+ const scriptPath = join(__dirname, '../../setup/aws/disable-defender.sh');
35
+
36
+ console.log(`[TestDriver] Disabling Windows Defender on ${instanceId}...`);
37
+
38
+ try {
39
+ execSync(`bash ${scriptPath}`, {
40
+ encoding: 'utf-8',
41
+ env: {
42
+ ...process.env,
43
+ AWS_REGION: awsRegion,
44
+ INSTANCE_ID: instanceId,
45
+ },
46
+ stdio: 'inherit',
47
+ });
48
+ } catch (error) {
49
+ console.warn('[TestDriver] Failed to disable Defender:', error.message);
50
+ // Don't throw - this is optional optimization
51
+ }
52
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "testdriverai",
3
- "version": "7.3.11",
3
+ "version": "7.3.13",
4
4
  "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
5
5
  "main": "sdk.js",
6
6
  "types": "sdk.d.ts",
@@ -23,6 +23,7 @@
23
23
  },
24
24
  "./vitest/setup": "./lib/vitest/setup.mjs",
25
25
  "./vitest/setup-aws": "./lib/vitest/setup-aws.mjs",
26
+ "./vitest/setup-disable-defender": "./lib/vitest/setup-disable-defender.mjs",
26
27
  "./vitest/hooks": {
27
28
  "types": "./lib/vitest/hooks.d.ts",
28
29
  "default": "./lib/vitest/hooks.mjs"
@@ -376,6 +376,15 @@ class SDKLogFormatter {
376
376
  }
377
377
  if (meta.cacheHit) {
378
378
  metaParts.push(chalk.bold.yellow("⚡ cached"));
379
+ if (meta.validated) {
380
+ const confStr = meta.validationConfidence !== null && meta.validationConfidence !== undefined
381
+ ? ` ${(meta.validationConfidence * 100).toFixed(1)}%`
382
+ : '';
383
+ metaParts.push(chalk.green(`✅ validated${confStr}`));
384
+ if (meta.coordsUpdated) {
385
+ metaParts.push(chalk.dim.yellow(`↗ coords shifted`));
386
+ }
387
+ }
379
388
  }
380
389
  if (meta.confidence !== undefined && meta.confidence !== null) {
381
390
  metaParts.push(chalk.dim.gray(`confidence: ${meta.confidence}`));
@@ -470,6 +479,46 @@ class SDKLogFormatter {
470
479
  return parts.join(" ");
471
480
  }
472
481
 
482
+ /**
483
+ * Format a single-line findAll message (combines finding + result) 🔎
484
+ * @param {string} description - Element description
485
+ * @param {number} count - Number of elements found
486
+ * @param {Object} meta - Metadata (duration, cache hit)
487
+ * @returns {string} Formatted message
488
+ */
489
+ formatFindAllSingleLine(description, count, meta = {}) {
490
+ const parts = [];
491
+ this.addTimestamp(parts);
492
+ parts.push(this.getPrefix("findAll"));
493
+ parts.push(chalk.bold.magenta("Finding All"));
494
+ parts.push(chalk.cyan(`"${description}"`));
495
+
496
+ const metaParts = [];
497
+
498
+ // Add count with appropriate coloring
499
+ if (count > 0) {
500
+ metaParts.push(chalk.green(`found ${count}`));
501
+ } else {
502
+ metaParts.push(chalk.yellow("found 0"));
503
+ }
504
+
505
+ // Add cache hit indicator
506
+ if (meta.cacheHit) {
507
+ metaParts.push(chalk.bold.yellow("⚡ cached"));
508
+ }
509
+
510
+ // Add duration
511
+ if (meta.duration) {
512
+ metaParts.push(this.formatDurationColored(meta.duration));
513
+ }
514
+
515
+ if (metaParts.length > 0) {
516
+ parts.push(this.joinMetaParts(metaParts));
517
+ }
518
+
519
+ return parts.join(" ");
520
+ }
521
+
473
522
  /**
474
523
  * Format an asserting message (when assertion starts) ✓
475
524
  * @param {string} assertion - What is being asserted
@@ -932,6 +981,47 @@ class SDKLogFormatter {
932
981
 
933
982
  return parts.join(" ");
934
983
  }
984
+
985
+ /**
986
+ * Format act() start message - provides visual scope boundary
987
+ * @param {string} task - The task being executed
988
+ * @returns {string} Formatted act start message
989
+ */
990
+ formatActStart(task) {
991
+ const parts = [];
992
+ this.addTimestamp(parts);
993
+ parts.push(this.getPrefix("action"));
994
+ parts.push(chalk.bold.cyan("Act"));
995
+ parts.push(chalk.cyan(`"${task}"`));
996
+ return parts.join(" ");
997
+ }
998
+
999
+ /**
1000
+ * Format act() completion message - provides visual scope boundary
1001
+ * @param {number} durationMs - Duration in milliseconds
1002
+ * @param {boolean} success - Whether the act completed successfully
1003
+ * @param {string} [error] - Error message if failed
1004
+ * @returns {string} Formatted act complete message
1005
+ */
1006
+ formatActComplete(durationMs, success, error = null) {
1007
+ const parts = [];
1008
+ this.addTimestamp(parts);
1009
+ parts.push(this.getResultPrefix());
1010
+
1011
+ if (success) {
1012
+ parts.push(chalk.green("complete"));
1013
+ } else {
1014
+ parts.push(chalk.red("failed"));
1015
+ if (error) {
1016
+ parts.push(chalk.dim("·"));
1017
+ parts.push(chalk.red(error));
1018
+ }
1019
+ }
1020
+
1021
+ parts.push(this.formatDurationColored(durationMs, "default"));
1022
+
1023
+ return parts.join(" ");
1024
+ }
935
1025
  }
936
1026
 
937
1027
  // Export singleton instance
package/sdk.d.ts CHANGED
@@ -228,11 +228,25 @@ export interface TestDriverOptions {
228
228
  analytics?: boolean;
229
229
  /** Enable console logging output (default: true) */
230
230
  logging?: boolean;
231
- /** Enable/disable cache (default: true). Set to false to force regeneration on all find operations */
232
- cache?: boolean;
233
- /** Global AI sampling configuration. Can be overridden per find() or assert() call. */
231
+ /** Enable/disable cache, or configure with thresholds
232
+ * @example { cache: { enabled: true, thresholds: { find: { screen: 0.05, element: 0.8 }, assert: 0.05 } } }
233
+ */
234
+ cache?: boolean | {
235
+ enabled?: boolean;
236
+ thresholds?: {
237
+ /** Thresholds for find operations */
238
+ find?: {
239
+ /** Pixel diff threshold for screen comparison (0-1, default 0.05 = 5% diff allowed) */
240
+ screen?: number;
241
+ /** OpenCV template match threshold for element matching (0-1, default 0.8 = 80% correlation) */
242
+ element?: number;
243
+ };
244
+ /** Pixel diff threshold for assert operations (0-1, default 0.05 = 5% diff allowed) */
245
+ assert?: number;
246
+ };
247
+ };
234
248
  ai?: AIConfig;
235
- /** Cache threshold configuration for different methods */
249
+ /** @deprecated Use cache.thresholds instead */
236
250
  cacheThreshold?: {
237
251
  /** Threshold for find operations (default: 0.05 = 5% difference, 95% similarity) */
238
252
  find?: number;
@@ -272,20 +286,29 @@ export interface TestDriverOptions {
272
286
  * Example: 001-click-before-L42-submit-button.png
273
287
  */
274
288
  autoScreenshots?: boolean;
275
- /** Redraw configuration for screen change detection */
289
+ /** Redraw configuration for screen change detection
290
+ * @example { redraw: { enabled: true, thresholds: { screen: 0.05, network: true } } }
291
+ */
276
292
  redraw?:
277
293
  | boolean
278
294
  | {
279
295
  /** Enable redraw detection (default: true) */
280
296
  enabled?: boolean;
281
- /** Pixel difference threshold for redraw detection */
297
+ /** Threshold configuration */
298
+ thresholds?: {
299
+ /** Screen diff threshold (0-1). Set to false to disable screen redraw detection. Default: 0.05 */
300
+ screen?: number | false;
301
+ /** Enable/disable network activity monitoring (default: false) */
302
+ network?: boolean;
303
+ };
304
+ /** @deprecated Use thresholds.screen instead */
282
305
  diffThreshold?: number;
283
- /** Enable screen redraw detection */
306
+ /** @deprecated Use thresholds.screen !== false instead */
284
307
  screenRedraw?: boolean;
285
- /** Enable network activity monitoring */
308
+ /** @deprecated Use thresholds.network instead */
286
309
  networkMonitor?: boolean;
287
310
  };
288
- /** @deprecated Use redraw.diffThreshold instead */
311
+ /** @deprecated Use redraw option instead */
289
312
  redrawThreshold?: number | object;
290
313
  /** Additional environment variables */
291
314
  environment?: Record<string, any>;
@@ -366,8 +389,8 @@ export interface HoverResult {
366
389
  [key: string]: any;
367
390
  }
368
391
 
369
- /** Bounding box for an OCR word */
370
- export interface OCRBoundingBox {
392
+ /** Bounding box for a parsed element (pixel coordinates) */
393
+ export interface ParsedElementBBox {
371
394
  /** Left edge X coordinate */
372
395
  x0: number;
373
396
  /** Top edge Y coordinate */
@@ -378,24 +401,36 @@ export interface OCRBoundingBox {
378
401
  y1: number;
379
402
  }
380
403
 
381
- /** Individual word extracted by OCR */
382
- export interface OCRWord {
383
- /** The text content of the word */
404
+ /** Bounding box as {left, top, width, height} */
405
+ export interface ParsedElementBoundingBox {
406
+ left: number;
407
+ top: number;
408
+ width: number;
409
+ height: number;
410
+ }
411
+
412
+ /** Individual element detected by OmniParser */
413
+ export interface ParsedElement {
414
+ /** Element index */
415
+ index: number;
416
+ /** Element type (e.g. "text", "icon", "button") */
417
+ type: string;
418
+ /** Text content or description */
384
419
  content: string;
385
- /** Confidence score for this word (0-100) */
386
- confidence: number;
387
- /** Bounding box coordinates */
388
- bbox: OCRBoundingBox;
420
+ /** Interactivity level (e.g. "clickable", "non-interactive") */
421
+ interactivity: string;
422
+ /** Bounding box in pixel coordinates */
423
+ bbox: ParsedElementBBox;
424
+ /** Bounding box as {left, top, width, height} */
425
+ boundingBox: ParsedElementBoundingBox;
389
426
  }
390
427
 
391
- /** Result from OCR text extraction */
392
- export interface OCRResult {
393
- /** Array of extracted words with positions */
394
- words: OCRWord[];
395
- /** All text concatenated with spaces */
396
- fullText: string;
397
- /** Overall OCR confidence (0-100) */
398
- confidence: number;
428
+ /** Result from OmniParser screen analysis */
429
+ export interface ParseResult {
430
+ /** Array of detected UI elements */
431
+ elements: ParsedElement[];
432
+ /** URL of the annotated screenshot */
433
+ annotatedImageUrl: string;
399
434
  /** Width of the analyzed screenshot */
400
435
  imageWidth: number;
401
436
  /** Height of the analyzed screenshot */
@@ -711,7 +746,7 @@ export class Element {
711
746
  /**
712
747
  * Find the element on screen
713
748
  * @param newDescription - Optional new description to search for
714
- * @param cacheThreshold - Cache threshold for this specific find (overrides global setting)
749
+ * @param options - Cache options: number for threshold, or object with cache.thresholds
715
750
  */
716
751
  find(newDescription?: string, cacheThreshold?: number): Promise<Element>;
717
752
 
@@ -1021,7 +1056,7 @@ export default class TestDriverSDK {
1021
1056
  * Automatically locates the element and returns it
1022
1057
  *
1023
1058
  * @param description - Description of the element to find
1024
- * @param options - Cache threshold (number) or options object
1059
+ * @param options - Cache threshold (number) or options object with cache.thresholds
1025
1060
  * @returns Chainable promise that resolves to Element instance
1026
1061
  *
1027
1062
  * @example
@@ -1034,8 +1069,10 @@ export default class TestDriverSDK {
1034
1069
  * await element.click();
1035
1070
  *
1036
1071
  * @example
1037
- * // Find with custom cache threshold
1038
- * const element = await client.find('login button', 0.01);
1072
+ * // Find with custom cache thresholds
1073
+ * const element = await client.find('login button', {
1074
+ * cache: { thresholds: { screen: 0.05, element: 0.9 } }
1075
+ * });
1039
1076
  *
1040
1077
  * @example
1041
1078
  * // Poll for element with timeout (retries every 5 seconds)
@@ -1045,7 +1082,7 @@ export default class TestDriverSDK {
1045
1082
  find(description: string, cacheThreshold?: number): ChainableElementPromise;
1046
1083
  find(
1047
1084
  description: string,
1048
- options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number; ai?: AIConfig },
1085
+ options?: { cacheThreshold?: number; cacheKey?: string; timeout?: number; ai?: AIConfig; cache?: { thresholds?: { screen?: number; element?: number } } },
1049
1086
  ): ChainableElementPromise;
1050
1087
 
1051
1088
  /**
@@ -1060,9 +1097,13 @@ export default class TestDriverSDK {
1060
1097
  *
1061
1098
  * @example
1062
1099
  * // Find with custom cache threshold
1063
- * const items = await client.findAll('list item', 0.01);
1100
+ * const items = await client.findAll('list item', 0.05);
1064
1101
  */
1065
1102
  findAll(description: string, cacheThreshold?: number): Promise<Element[]>;
1103
+ findAll(
1104
+ description: string,
1105
+ options?: { cacheThreshold?: number; cacheKey?: string; cache?: { thresholds?: { screen?: number } } },
1106
+ ): Promise<Element[]>;
1066
1107
 
1067
1108
  // Text Interaction Methods
1068
1109
 
@@ -1282,7 +1323,7 @@ export default class TestDriverSDK {
1282
1323
  *
1283
1324
  * @example
1284
1325
  * // With custom threshold
1285
- * await client.assert('the page loaded', { threshold: 0.01, cacheKey: 'login-test' });
1326
+ * await client.assert('the page loaded', { threshold: 0.05, cacheKey: 'login-test' });
1286
1327
  */
1287
1328
  assert(assertion: string, options?: { threshold?: number; cacheKey?: string; os?: string; resolution?: string; ai?: AIConfig }): Promise<boolean>;
1288
1329
 
@@ -1343,32 +1384,28 @@ export default class TestDriverSDK {
1343
1384
  screenshot(filename?: string): Promise<string>;
1344
1385
 
1345
1386
  /**
1346
- * Extract all visible text from the current screen using OCR (Tesseract)
1347
- * Returns structured data with text content, bounding boxes, and confidence scores
1387
+ * Parse the current screen using OmniParser v2 to detect all UI elements
1388
+ * Returns structured data with element types, bounding boxes, and content
1389
+ * Requires enterprise or self-hosted plan.
1348
1390
  *
1349
- * @returns OCR extraction result with words, positions, and confidence
1391
+ * @returns Parsed screen elements with positions and types
1350
1392
  *
1351
1393
  * @example
1352
- * // Get all text on screen
1353
- * const result = await testdriver.ocr();
1354
- * console.log(result.fullText);
1394
+ * // Get all elements on screen
1395
+ * const result = await testdriver.parse();
1396
+ * console.log(`Found ${result.elements.length} elements`);
1355
1397
  *
1356
1398
  * @example
1357
- * // Find and click text
1358
- * const result = await testdriver.ocr();
1359
- * const submit = result.words.find(w => w.content === 'Submit');
1360
- * if (submit) {
1361
- * const x = (submit.bbox.x0 + submit.bbox.x1) / 2;
1362
- * const y = (submit.bbox.y0 + submit.bbox.y1) / 2;
1363
- * await testdriver.click({ x, y });
1364
- * }
1399
+ * // Find clickable elements
1400
+ * const result = await testdriver.parse();
1401
+ * const clickable = result.elements.filter(e => e.interactivity === 'clickable');
1365
1402
  *
1366
1403
  * @example
1367
- * // Check if text exists
1368
- * const result = await testdriver.ocr();
1369
- * const hasError = result.words.some(w => w.content.toLowerCase().includes('error'));
1404
+ * // Find text content
1405
+ * const result = await testdriver.parse();
1406
+ * const textElements = result.elements.filter(e => e.type === 'text');
1370
1407
  */
1371
- ocr(): Promise<OCRResult>;
1408
+ parse(): Promise<ParseResult>;
1372
1409
 
1373
1410
  /**
1374
1411
  * Wait for specified time