testdriverai 7.3.12 → 7.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.github/skills/testdriver:ai/SKILL.md +204 -0
  2. package/.github/skills/testdriver:assert/SKILL.md +284 -0
  3. package/.github/skills/testdriver:aws-setup/SKILL.md +515 -0
  4. package/.github/skills/testdriver:caching/SKILL.md +124 -0
  5. package/.github/skills/testdriver:captcha/SKILL.md +159 -0
  6. package/.github/skills/testdriver:ci-cd/SKILL.md +602 -0
  7. package/.github/skills/testdriver:click/SKILL.md +286 -0
  8. package/.github/skills/testdriver:client/SKILL.md +339 -0
  9. package/.github/skills/testdriver:cloud/SKILL.md +119 -0
  10. package/.github/skills/testdriver:customizing-devices/SKILL.md +153 -0
  11. package/.github/skills/testdriver:dashcam/SKILL.md +418 -0
  12. package/.github/skills/testdriver:debugging-with-screenshots/SKILL.md +271 -0
  13. package/.github/skills/testdriver:device-config/SKILL.md +317 -0
  14. package/.github/skills/testdriver:double-click/SKILL.md +102 -0
  15. package/.github/skills/testdriver:elements/SKILL.md +605 -0
  16. package/.github/skills/testdriver:enterprise/SKILL.md +114 -0
  17. package/.github/skills/testdriver:examples/SKILL.md +7 -0
  18. package/.github/skills/testdriver:exec/SKILL.md +345 -0
  19. package/.github/skills/testdriver:find/SKILL.md +721 -0
  20. package/.github/skills/testdriver:focus-application/SKILL.md +293 -0
  21. package/.github/skills/testdriver:generating-tests/SKILL.md +36 -0
  22. package/.github/skills/testdriver:hover/SKILL.md +278 -0
  23. package/.github/skills/testdriver:locating-elements/SKILL.md +71 -0
  24. package/.github/skills/testdriver:making-assertions/SKILL.md +32 -0
  25. package/.github/skills/testdriver:mcp-workflow/SKILL.md +410 -0
  26. package/.github/skills/testdriver:mouse-down/SKILL.md +161 -0
  27. package/.github/skills/testdriver:mouse-up/SKILL.md +164 -0
  28. package/.github/skills/testdriver:performing-actions/SKILL.md +51 -0
  29. package/.github/skills/testdriver:press-keys/SKILL.md +348 -0
  30. package/.github/skills/testdriver:quickstart/SKILL.md +161 -0
  31. package/.github/skills/testdriver:reusable-code/SKILL.md +240 -0
  32. package/.github/skills/testdriver:right-click/SKILL.md +123 -0
  33. package/.github/skills/testdriver:running-tests/SKILL.md +181 -0
  34. package/.github/skills/testdriver:screenshot/SKILL.md +167 -0
  35. package/.github/skills/testdriver:scroll/SKILL.md +299 -0
  36. package/.github/skills/testdriver:secrets/SKILL.md +115 -0
  37. package/.github/skills/testdriver:self-hosted/SKILL.md +65 -0
  38. package/.github/skills/testdriver:test-writer/SKILL.md +451 -0
  39. package/.github/skills/testdriver:testdriver/SKILL.md +523 -0
  40. package/.github/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
  41. package/.github/skills/testdriver:type/SKILL.md +357 -0
  42. package/.github/skills/testdriver:variables/SKILL.md +111 -0
  43. package/.github/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
  44. package/.github/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
  45. package/.github/workflows/acceptance-windows-scheduled.yaml +6 -1
  46. package/.github/workflows/acceptance.yaml +0 -36
  47. package/.github/workflows/update-examples.yaml +53 -0
  48. package/CHANGELOG.md +4 -0
  49. package/agent/events.js +1 -0
  50. package/agent/index.js +8 -0
  51. package/agent/lib/commands.js +48 -29
  52. package/agent/lib/redraw.js +3 -1
  53. package/agent/lib/sandbox.js +166 -14
  54. package/agent/lib/sdk.js +142 -3
  55. package/agent/lib/system.js +4 -6
  56. package/ai/skills/testdriver:ai/SKILL.md +204 -0
  57. package/ai/skills/testdriver:assert/SKILL.md +315 -0
  58. package/ai/skills/testdriver:aws-setup/SKILL.md +448 -0
  59. package/ai/skills/testdriver:caching/SKILL.md +124 -0
  60. package/ai/skills/testdriver:captcha/SKILL.md +159 -0
  61. package/ai/skills/testdriver:ci-cd/SKILL.md +602 -0
  62. package/ai/skills/testdriver:click/SKILL.md +286 -0
  63. package/ai/skills/testdriver:client/SKILL.md +372 -0
  64. package/ai/skills/testdriver:cloud/SKILL.md +119 -0
  65. package/ai/skills/testdriver:customizing-devices/SKILL.md +153 -0
  66. package/ai/skills/testdriver:dashcam/SKILL.md +418 -0
  67. package/ai/skills/testdriver:debugging-with-screenshots/SKILL.md +401 -0
  68. package/ai/skills/testdriver:device-config/SKILL.md +317 -0
  69. package/ai/skills/testdriver:double-click/SKILL.md +102 -0
  70. package/ai/skills/testdriver:elements/SKILL.md +605 -0
  71. package/ai/skills/testdriver:enterprise/SKILL.md +114 -0
  72. package/ai/skills/testdriver:examples/SKILL.md +7 -0
  73. package/ai/skills/testdriver:exec/SKILL.md +345 -0
  74. package/ai/skills/testdriver:find/SKILL.md +745 -0
  75. package/ai/skills/testdriver:focus-application/SKILL.md +293 -0
  76. package/ai/skills/testdriver:generating-tests/SKILL.md +36 -0
  77. package/ai/skills/testdriver:hover/SKILL.md +278 -0
  78. package/ai/skills/testdriver:locating-elements/SKILL.md +71 -0
  79. package/ai/skills/testdriver:making-assertions/SKILL.md +32 -0
  80. package/ai/skills/testdriver:mcp-workflow/SKILL.md +410 -0
  81. package/ai/skills/testdriver:mouse-down/SKILL.md +161 -0
  82. package/ai/skills/testdriver:mouse-up/SKILL.md +164 -0
  83. package/ai/skills/testdriver:ocr/SKILL.md +235 -0
  84. package/ai/skills/testdriver:performing-actions/SKILL.md +51 -0
  85. package/ai/skills/testdriver:press-keys/SKILL.md +348 -0
  86. package/ai/skills/testdriver:quickstart/SKILL.md +146 -0
  87. package/ai/skills/testdriver:reusable-code/SKILL.md +240 -0
  88. package/ai/skills/testdriver:right-click/SKILL.md +123 -0
  89. package/ai/skills/testdriver:running-tests/SKILL.md +185 -0
  90. package/ai/skills/testdriver:screenshot/SKILL.md +248 -0
  91. package/ai/skills/testdriver:scroll/SKILL.md +335 -0
  92. package/ai/skills/testdriver:secrets/SKILL.md +115 -0
  93. package/ai/skills/testdriver:self-hosted/SKILL.md +65 -0
  94. package/ai/skills/testdriver:test-writer/SKILL.md +451 -0
  95. package/ai/skills/testdriver:testdriver/SKILL.md +631 -0
  96. package/ai/skills/testdriver:testdriver-mechanic/SKILL.md +165 -0
  97. package/ai/skills/testdriver:type/SKILL.md +357 -0
  98. package/ai/skills/testdriver:variables/SKILL.md +111 -0
  99. package/ai/skills/testdriver:waiting-for-elements/SKILL.md +66 -0
  100. package/ai/skills/testdriver:what-is-testdriver/SKILL.md +54 -0
  101. package/debugger/index.html +12 -2
  102. package/docs/v7/examples/scroll-keyboard.mdx +1 -1
  103. package/docs/v7/find.mdx +1 -0
  104. package/examples/config.mjs +1 -1
  105. package/examples/findall-coffee-icons.test.mjs +42 -0
  106. package/examples/flake-diffthreshold-001.test.mjs +9 -0
  107. package/examples/flake-diffthreshold-01.test.mjs +9 -0
  108. package/examples/flake-diffthreshold-05.test.mjs +9 -0
  109. package/examples/{z_flake-noredraw-cache.test.mjs → flake-noredraw-cache.test.mjs} +2 -2
  110. package/examples/{z_flake-noredraw-nocache.test.mjs → flake-noredraw-nocache.test.mjs} +2 -2
  111. package/examples/{z_flake-redraw-cache.test.mjs → flake-redraw-cache.test.mjs} +2 -2
  112. package/examples/{z_flake-redraw-nocache.test.mjs → flake-redraw-nocache.test.mjs} +2 -2
  113. package/examples/flake-rocket-match.test.mjs +30 -0
  114. package/examples/{z_flake-shared.mjs → flake-shared.mjs} +2 -2
  115. package/examples/parse.test.mjs +19 -0
  116. package/examples/scroll-keyboard.test.mjs +1 -1
  117. package/interfaces/cli/lib/base.js +6 -0
  118. package/interfaces/logger.js +51 -13
  119. package/interfaces/vitest-plugin.mjs +137 -0
  120. package/lib/core/index.d.ts +22 -0
  121. package/lib/init-project.js +105 -6
  122. package/lib/vitest/hooks.mjs +2 -5
  123. package/lib/vitest/setup-disable-defender.mjs +52 -0
  124. package/package.json +2 -1
  125. package/sdk-log-formatter.js +90 -0
  126. package/sdk.d.ts +88 -51
  127. package/sdk.js +126 -18
  128. package/setup/aws/disable-defender.sh +42 -0
  129. package/vitest.config.mjs +1 -3
  130. package/examples/z_flake-diffthreshold-001.test.mjs +0 -9
  131. package/examples/z_flake-diffthreshold-01.test.mjs +0 -9
  132. package/examples/z_flake-diffthreshold-05.test.mjs +0 -9
  133. /package/{examples → manual}/captcha-api.test.mjs +0 -0
@@ -0,0 +1,54 @@
1
+ ---
2
+ name: testdriver:what-is-testdriver
3
+ description: Reliably test your most difficult user flows
4
+ ---
5
+ <!-- Generated from what-is-testdriver.mdx. DO NOT EDIT. -->
6
+
7
+ ## The problem with modern testing tools
8
+
9
+ Modern testing tools like Playwright are designed to test a single web application, running in a single browser tab using selectors.
10
+
11
+ However, selectors are often either unreliable or unavailable in complex scenarios, leading to brittle and flaky tests:
12
+
13
+ | Challenge | Problem | Examples |
14
+ |-----------|---------|----------|
15
+ | **Fast moving teams** | Frequently change UI structure, breaking CSS/XPath selectors | Agile teams, startups, vibe-coders |
16
+ | **Dynamic content** | Cannot be targeted with selectors | AI chatbots, PDFs, images, videos |
17
+ | **Software you don't own** | May lack proper accessibility attributes | Other websites, extensions, third-party applications |
18
+ | **Multi-application workflows** | Cannot be tested with web-only tools | Desktop apps, browser extensions, IDEs |
19
+ | **Visual states** | Impossible to verify with code-based selectors | Charts, graphs, videos, images, spelling errors, UI layout |
20
+
21
+ ## The TestDriver Solution
22
+
23
+ TestDriver is a complete testing platform built specifically for handling these scenarios. It consists of a Javascript SDK, hosted infrastructure, and debugging tools that make it easy to write, run, and maintain tests for your most difficult user flows.
24
+
25
+ ### Javascript SDK
26
+
27
+ Here is an example of a TestDriver test that installs a production Chrome extension from the Chrome Web Store and verifies that it appears in the extensions menu:
28
+
29
+ ```javascript Installing Loom from the Chrome Web Store
30
+ import { describe, expect, it } from "vitest";
31
+ import { TestDriver } from "testdriverai/vitest/hooks";
32
+
33
+ describe("Chrome Extension Test", () => {
34
+ const testdriver = TestDriver(context);
35
+
36
+ // Launch Chrome with Loom loaded by its Chrome Web Store ID
37
+ await testdriver.provision.chromeExtension({
38
+ extensionId: 'liecbddmkiiihnedobmlmillhodjkdmb'
39
+ });
40
+
41
+ // Click on the extensions button (puzzle piece icon) in Chrome toolbar
42
+ const extensionsButton = await testdriver.find("The puzzle-shaped icon in the Chrome toolbar.");
43
+ await extensionsButton.click();
44
+
45
+ // Look for Loom in the extensions menu
46
+ const loomExtension = await testdriver.find("Loom extension in the extensions dropdown");
47
+ expect(loomExtension.found()).toBeTruthy();
48
+ });
49
+ ```
50
+
51
+
52
+ <Tip>[vitest](https://vitest.dev/) is the preferred test runner for TestDriver.</Tip>
53
+
54
+ ,
@@ -178,7 +178,7 @@
178
178
  #vm-iframe {
179
179
  display: none;
180
180
  border: none;
181
- pointer-events: auto;
181
+ pointer-events: none; /* view-only by default, enabled on user click */
182
182
  position: absolute;
183
183
  top: 0px;
184
184
  left: 0px;
@@ -188,6 +188,10 @@
188
188
  overflow: hidden;
189
189
  }
190
190
 
191
+ #vm-iframe.interactive {
192
+ pointer-events: auto;
193
+ }
194
+
191
195
  /* Loading screen styles */
192
196
  .loading-screen {
193
197
  position: absolute;
@@ -564,6 +568,7 @@
564
568
  const hideInteractionOverlay = () => {
565
569
  interactionOverlay.classList.remove("visible");
566
570
  isInteractionEnabled = true;
571
+ iframe.classList.add("interactive"); // Enable pointer events on iframe
567
572
 
568
573
  // Clear any existing timeout
569
574
  if (interactionTimeout) {
@@ -571,6 +576,11 @@
571
576
  }
572
577
  };
573
578
 
579
+ const disableInteraction = () => {
580
+ isInteractionEnabled = false;
581
+ iframe.classList.remove("interactive"); // Disable pointer events on iframe
582
+ };
583
+
574
584
  // Event listeners for interaction overlay
575
585
  overlay.addEventListener("mouseenter", showInteractionOverlay);
576
586
  overlay.addEventListener("mouseleave", () => {
@@ -754,7 +764,7 @@
754
764
  // Handle window blur/focus for screen locking
755
765
  window.addEventListener("blur", () => {
756
766
  showInteractionOverlay();
757
- isInteractionEnabled = false;
767
+ disableInteraction();
758
768
  });
759
769
  </script>
760
770
  </body>
@@ -38,7 +38,7 @@ describe("Scroll Keyboard Test", () => {
38
38
  // Navigate to https://www.webhamster.com/
39
39
  await testdriver.focusApplication("Google Chrome");
40
40
  const urlBar = await testdriver.find(
41
- "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page",
41
+ "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page", {zoom: true}
42
42
  );
43
43
  await urlBar.click();
44
44
  await testdriver.pressKeys(["ctrl", "a"]);
package/docs/v7/find.mdx CHANGED
@@ -274,6 +274,7 @@ This two-phase approach gives the AI a higher-resolution view of the target area
274
274
  - Selecting from a grid of similar items
275
275
  - Targeting elements in dense UI areas
276
276
  - The default locate is clicking the wrong similar element
277
+ - You get an AI verification rejection like "The crosshair is located in the empty space of the browser's tab bar/title bar area" — this means the initial locate was imprecise and zoom will help the AI pinpoint the correct element
277
278
  </Tip>
278
279
 
279
280
  ```javascript
@@ -1,5 +1,5 @@
1
1
  export const getDefaults = (context) => ({
2
2
  ip: context.ip || process.env.TD_IP,
3
- redraw: false,
3
+ redraw: { enabled: false },
4
4
  preview: 'ide',
5
5
  });
@@ -0,0 +1,42 @@
1
+ /**
2
+ * TestDriver SDK - FindAll Coffee Icons Test
3
+ * Loads a random icon grid and uses findAll() to locate and click all 4 coffee cup icons
4
+ */
5
+
6
+ import { describe, expect, it } from "vitest";
7
+ import { TestDriver } from "../lib/vitest/hooks.mjs";
8
+ import { getDefaults } from "./config.mjs";
9
+
10
+ describe("FindAll Coffee Icons", () => {
11
+ it("should find and click all 4 coffee cup icons", async (context) => {
12
+ const testdriver = TestDriver(context, {
13
+ ...getDefaults(context),
14
+ headless: true,
15
+ });
16
+
17
+ await testdriver.provision.chrome({
18
+ url: "https://v0-random-icon-grid.vercel.app/",
19
+ });
20
+
21
+ // Use findAll to locate all coffee cup icons on the page
22
+ const coffeeIcons = await testdriver.findAll("coffee cup icon, there are exactly 4 on the page");
23
+
24
+ // Log each icon's coordinates
25
+ console.log(`Found ${coffeeIcons.length} coffee icons:`);
26
+ coffeeIcons.forEach((icon, i) => {
27
+ console.log(` Icon ${i + 1}: (${icon.x}, ${icon.y}) center=(${icon.centerX}, ${icon.centerY})`);
28
+ });
29
+
30
+ // Verify we found 3 or 4 coffee icons
31
+ expect(coffeeIcons.length).toBeGreaterThanOrEqual(3);
32
+ expect(coffeeIcons.length).toBeLessThanOrEqual(4);
33
+
34
+ // Click each coffee cup icon
35
+ for (const icon of coffeeIcons) {
36
+ await icon.click();
37
+ }
38
+
39
+ // Verify the selection count is displayed
40
+ await testdriver.assert("the page says 'Selected: 3 / 4' or 'Matched 4 of a kind!'");
41
+ });
42
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Popup Loading - diffThreshold=0.01, cache=true
3
+ */
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
+
6
+ popupLoadingTest("screen=0.01, cache=true", {
7
+ redraw: { enabled: true, thresholds: { screen: 0.01 } },
8
+ cache: true,
9
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Popup Loading - diffThreshold=0.1, cache=false
3
+ */
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
+
6
+ popupLoadingTest("screen=0.1, cache=false", {
7
+ redraw: { enabled: true, thresholds: { screen: 0.1 } },
8
+ cache: false,
9
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Popup Loading - diffThreshold=0.5, cache=true
3
+ */
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
+
6
+ popupLoadingTest("screen=0.5, cache=true", {
7
+ redraw: { enabled: true, thresholds: { screen: 0.5 } },
8
+ cache: true,
9
+ });
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * Popup Loading - redraw=false, cache=true
3
3
  */
4
- import { popupLoadingTest } from "./z_flake-shared.mjs";
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
5
 
6
6
  popupLoadingTest("redraw=false, cache=true", {
7
- redraw: false,
7
+ redraw: { enabled: false },
8
8
  cache: true,
9
9
  });
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * Popup Loading - redraw=false, cache=false
3
3
  */
4
- import { popupLoadingTest } from "./z_flake-shared.mjs";
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
5
 
6
6
  popupLoadingTest("redraw=false, cache=false", {
7
- redraw: false,
7
+ redraw: { enabled: false },
8
8
  cache: false,
9
9
  });
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * Popup Loading - redraw=true, cache=true
3
3
  */
4
- import { popupLoadingTest } from "./z_flake-shared.mjs";
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
5
 
6
6
  popupLoadingTest("redraw=true, cache=true", {
7
- redraw: true,
7
+ redraw: { enabled: true },
8
8
  cache: true,
9
9
  });
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * Popup Loading - redraw=true, cache=false
3
3
  */
4
- import { popupLoadingTest } from "./z_flake-shared.mjs";
4
+ import { popupLoadingTest } from "./flake-shared.mjs";
5
5
 
6
6
  popupLoadingTest("redraw=true, cache=false", {
7
- redraw: true,
7
+ redraw: { enabled: true },
8
8
  cache: false,
9
9
  });
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Popup Loading - Skip straight to the rocket match (skipToIcons=true)
3
+ */
4
+ import { describe, expect, it } from "vitest";
5
+ import { TestDriver } from "../lib/vitest/hooks.mjs";
6
+
7
+ describe("Rocket Match (skipToIcons)", () => {
8
+ it("should find the rocket in the icon grid", async (context) => {
9
+ const testdriver = TestDriver(context, {
10
+ preview: "ide",
11
+ ip: context.ip || process.env.TD_IP,
12
+ });
13
+
14
+ await testdriver.provision.chrome({
15
+ url: "https://v0-popup-with-loading-bar.vercel.app/?skipToIcons=true",
16
+ });
17
+
18
+ // Wait for the 5x5 grid of images to fully load and click the rocket
19
+ await testdriver.find("The icon of a rocket in the 5x5 grid of images", {
20
+ timeout: 60000,
21
+ zoom: 1,
22
+ }).click();
23
+
24
+ // Assert the success message appears
25
+ const rocketResult = await testdriver.assert(
26
+ "The text 'You found the rocket!' is visible on the page"
27
+ );
28
+ expect(rocketResult).toBeTruthy();
29
+ });
30
+ });
@@ -20,7 +20,7 @@ export function popupLoadingTest(label, options = {}) {
20
20
  await testdriver.screenshot();
21
21
 
22
22
  // Accept the cookie banner to trigger the loading process
23
- let acceptButton = await testdriver.find("Accept All button on the cookie banner", {timeout: 60000});
23
+ let acceptButton = await testdriver.find("Accept All button on the cookie banner", {timeout: 10000});
24
24
 
25
25
  if (await acceptButton.found()) {
26
26
  await acceptButton.click();
@@ -41,7 +41,7 @@ export function popupLoadingTest(label, options = {}) {
41
41
  await testdriver.find("Continue button in the modal").click();
42
42
 
43
43
  // Wait for the 5x5 grid of images to fully load (up to 60s) and click the rocket
44
- await testdriver.find("rocket image in the 5x5 grid", { timeout: 60000, cacheThreshold: -1 }).click();
44
+ await testdriver.find("The icon of a rocket in the 5x5 grid of images", { timeout: 60000, zoom: true }).click();
45
45
 
46
46
  // Assert the success message appears
47
47
  const rocketResult = await testdriver.assert("The text 'You found the rocket!' is visible on the page");
@@ -0,0 +1,19 @@
1
+ /**
2
+ * TestDriver SDK - Parse Test (Vitest)
3
+ * Opens Airbnb and runs the .parse() SDK command to analyze the screen.
4
+ */
5
+
6
+ import { describe, it } from "vitest";
7
+ import { TestDriver } from "../lib/vitest/hooks.mjs";
8
+ import { getDefaults } from "./config.mjs";
9
+
10
+ describe("Parse Test", () => {
11
+ it("should open Airbnb and parse the screen", async (context) => {
12
+ const testdriver = TestDriver(context, { ...getDefaults(context) });
13
+ await testdriver.provision.chrome({ url: "https://www.airbnb.com" });
14
+
15
+ const result = await testdriver.parse();
16
+ console.log(`Found ${result.elements?.length || 0} elements`);
17
+ console.log(JSON.stringify(result, null, 2));
18
+ });
19
+ });
@@ -16,7 +16,7 @@ describe("Scroll Keyboard Test", () => {
16
16
  // Navigate to https://www.webhamster.com/
17
17
  await testdriver.focusApplication("Google Chrome");
18
18
  const urlBar = await testdriver.find(
19
- "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page",
19
+ "testdriver-sandbox.vercel.app/login, the URL in the omnibox showing the current page", {zoom: true}
20
20
  );
21
21
  await urlBar.click();
22
22
  await testdriver.pressKeys(["ctrl", "a"]);
@@ -48,6 +48,8 @@ class BaseCommand extends Command {
48
48
  this.agent.sandbox.send({
49
49
  type: "output",
50
50
  output: Buffer.from(message).toString("base64"),
51
+ }).catch(() => {
52
+ // Silently ignore output send failures to prevent infinite loops
51
53
  });
52
54
  }
53
55
 
@@ -107,6 +109,10 @@ class BaseCommand extends Command {
107
109
  });
108
110
 
109
111
  this.agent.emitter.on("error:*", (message) => {
112
+ // Don't forward sandbox errors back to sandbox - this creates an infinite loop
113
+ // (sandbox error → error:* event → sendToSandbox → output message → sandbox error → ...)
114
+ const event = this.agent.emitter.event;
115
+ if (event === "error:sandbox") return;
110
116
  this.sendToSandbox(message);
111
117
  });
112
118
  });
@@ -11,31 +11,69 @@ class CustomTransport extends Transport {
11
11
  this.level = opts.level || "info";
12
12
  this.logStore = opts.logStore || []; // You could connect to a DB or API here
13
13
  this.sandbox = null;
14
+
15
+ // Batching configuration to reduce websocket traffic
16
+ this.batchQueue = [];
17
+ this.batchTimeout = null;
18
+ this.BATCH_INTERVAL_MS = 100; // Flush every 100ms
19
+ this.MAX_BATCH_SIZE = 20; // Or when batch reaches 20 messages
14
20
  }
15
21
 
16
- log(info, callback) {
17
-
22
+ _flushBatch() {
23
+ if (this.batchQueue.length === 0) return;
24
+
25
+ // Capture and clear the batch atomically to prevent duplicate sends
26
+ const batch = this.batchQueue;
27
+ this.batchQueue = [];
28
+ this.batchTimeout = null;
29
+
18
30
  try {
19
- const { message } = info;
20
-
21
31
  if (!this.sandbox) {
22
32
  this.sandbox = require("../agent/lib/sandbox");
23
33
  }
24
34
 
25
35
  if (this.sandbox && this.sandbox.instanceSocketConnected) {
26
-
27
-
28
- if (typeof message === "object") {
29
- console.log(chalk.cyan("protecting against base64 error"));
30
- console.log(message);
31
- return;
32
- }
33
-
36
+ // Send all batched messages as a single combined output
37
+ const combinedOutput = batch.join('\n');
34
38
  this.sandbox.send({
35
39
  type: "output",
36
- output: Buffer.from(message).toString("base64"),
40
+ output: Buffer.from(combinedOutput).toString("base64"),
41
+ }).catch((e) => {
42
+ // Re-queue failed messages for retry on next flush
43
+ console.error("Error sending log batch:", e);
37
44
  });
38
45
  }
46
+ } catch (e) {
47
+ // Re-queue on synchronous error as well
48
+ this.batchQueue = batch.concat(this.batchQueue);
49
+ console.error("Error flushing log batch:", e);
50
+ }
51
+ }
52
+
53
+ log(info, callback) {
54
+ try {
55
+ const { message } = info;
56
+
57
+ if (typeof message === "object") {
58
+ console.log(chalk.cyan("protecting against base64 error"));
59
+ console.log(message);
60
+ callback();
61
+ return;
62
+ }
63
+
64
+ // Add to batch queue instead of sending immediately
65
+ this.batchQueue.push(message);
66
+
67
+ // Flush if batch is full
68
+ if (this.batchQueue.length >= this.MAX_BATCH_SIZE) {
69
+ if (this.batchTimeout) {
70
+ clearTimeout(this.batchTimeout);
71
+ }
72
+ this._flushBatch();
73
+ } else if (!this.batchTimeout) {
74
+ // Schedule flush after interval
75
+ this.batchTimeout = setTimeout(() => this._flushBatch(), this.BATCH_INTERVAL_MS);
76
+ }
39
77
  } catch (e) {
40
78
  console.error("Error in CustomTransport log method:", e);
41
79
  }
@@ -2,6 +2,7 @@ import { execSync } from "child_process";
2
2
  import crypto from "crypto";
3
3
  import fs from "fs";
4
4
  import { createRequire } from "module";
5
+ import os from "os";
5
6
  import path from "path";
6
7
  import { postOrUpdateTestResults } from "../lib/github-comment.mjs";
7
8
  import { setTestRunInfo } from "./shared-test-state.mjs";
@@ -9,6 +10,125 @@ import { setTestRunInfo } from "./shared-test-state.mjs";
9
10
  // Use createRequire to import CommonJS modules without esbuild processing
10
11
  const require = createRequire(import.meta.url);
11
12
 
13
+ // Import Sentry for error reporting
14
+ const Sentry = require("@sentry/node");
15
+
16
+ // Track if Sentry has been initialized
17
+ let sentryInitialized = false;
18
+
19
+ /**
20
+ * Initialize Sentry for test failure reporting
21
+ * Uses same configuration as lib/sentry.js for consistency
22
+ */
23
+ function initializeSentry() {
24
+ if (sentryInitialized) return;
25
+
26
+ // Respect telemetry opt-out
27
+ if (process.env.TD_TELEMETRY === "false") {
28
+ return;
29
+ }
30
+
31
+ try {
32
+ const version = resolveTestDriverVersion() || "unknown";
33
+
34
+ Sentry.init({
35
+ dsn:
36
+ process.env.SENTRY_DSN ||
37
+ "https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
38
+ environment: "vitest",
39
+ release: `testdriverai@${version}`,
40
+ sampleRate: 1.0,
41
+ tracesSampleRate: 1.0,
42
+ enableLogs: true,
43
+ integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
44
+ initialScope: {
45
+ tags: {
46
+ platform: os.platform(),
47
+ arch: os.arch(),
48
+ nodeVersion: process.version,
49
+ runner: "vitest",
50
+ },
51
+ },
52
+ // Don't send user-cancelled errors
53
+ beforeSend(event, hint) {
54
+ const error = hint.originalException;
55
+ if (error && error.message && error.message.includes("User cancelled")) {
56
+ return null;
57
+ }
58
+ return event;
59
+ },
60
+ });
61
+
62
+ sentryInitialized = true;
63
+ logger.debug("Sentry initialized for vitest");
64
+ } catch (err) {
65
+ // Sentry init failed - continue without it
66
+ logger.debug("Failed to initialize Sentry:", err.message);
67
+ }
68
+ }
69
+
70
+ /**
71
+ * Capture a test failure in Sentry
72
+ * @param {Object} params - Test failure parameters
73
+ * @param {string} params.testName - Name of the test
74
+ * @param {string} params.testFile - File path of the test
75
+ * @param {string} params.errorMessage - Error message
76
+ * @param {string} [params.errorStack] - Error stack trace
77
+ * @param {string} [params.sessionId] - Session ID if available
78
+ * @param {string} [params.platform] - Platform (windows, mac, linux)
79
+ * @param {number} [params.duration] - Test duration in ms
80
+ */
81
+ function captureTestFailure({ testName, testFile, errorMessage, errorStack, sessionId, platform, duration }) {
82
+ if (!sentryInitialized || process.env.TD_TELEMETRY === "false") return;
83
+
84
+ try {
85
+ // Create an error object with the test failure details
86
+ const error = new Error(errorMessage);
87
+ error.name = "TestFailure";
88
+ if (errorStack) {
89
+ error.stack = errorStack;
90
+ }
91
+
92
+ Sentry.withScope((scope) => {
93
+ scope.setTag("test.name", testName);
94
+ scope.setTag("test.file", testFile);
95
+ scope.setTag("test.status", "failed");
96
+
97
+ if (sessionId) {
98
+ scope.setTag("session", sessionId);
99
+ }
100
+ if (platform) {
101
+ scope.setTag("platform", platform);
102
+ }
103
+
104
+ scope.setContext("test", {
105
+ name: testName,
106
+ file: testFile,
107
+ duration: duration,
108
+ sessionId: sessionId,
109
+ platform: platform,
110
+ });
111
+
112
+ Sentry.captureException(error);
113
+ });
114
+ } catch (err) {
115
+ logger.debug("Failed to capture test failure in Sentry:", err.message);
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Flush Sentry events before process exit
121
+ * @param {number} [timeout=2000] - Timeout in ms
122
+ */
123
+ async function flushSentry(timeout = 2000) {
124
+ if (!sentryInitialized) return;
125
+ try {
126
+ await Sentry.flush(timeout);
127
+ } catch (err) {
128
+ // Ignore flush errors
129
+ }
130
+ }
131
+
12
132
  /**
13
133
  * Resolve the TestDriver SDK version using multiple strategies.
14
134
  * Similar to resolveVitestVersion(), guards against import.meta.url rewriting.
@@ -710,6 +830,9 @@ class TestDriverReporter {
710
830
  this.ctx = ctx;
711
831
  logger.debug("onInit called - UPDATED VERSION");
712
832
 
833
+ // Initialize Sentry for error reporting
834
+ initializeSentry();
835
+
713
836
  // Store project root for making file paths relative
714
837
  pluginState.projectRoot = ctx.config.root || process.cwd();
715
838
  logger.debug("Project root:", pluginState.projectRoot);
@@ -936,6 +1059,9 @@ class TestDriverReporter {
936
1059
  } catch (error) {
937
1060
  logger.error("Failed to complete test run:", error.message);
938
1061
  logger.debug("Error stack:", error.stack);
1062
+ } finally {
1063
+ // Flush any pending Sentry events before process exits
1064
+ await flushSentry();
939
1065
  }
940
1066
  }
941
1067
 
@@ -1028,6 +1154,17 @@ class TestDriverReporter {
1028
1154
  const error = result.errors[0];
1029
1155
  errorMessage = error.message;
1030
1156
  errorStack = error.stack;
1157
+
1158
+ // Report test failure to Sentry
1159
+ captureTestFailure({
1160
+ testName: test.name,
1161
+ testFile,
1162
+ errorMessage,
1163
+ errorStack,
1164
+ sessionId,
1165
+ platform: platform || pluginState.detectedPlatform,
1166
+ duration,
1167
+ });
1031
1168
  }
1032
1169
 
1033
1170
  const suiteName = test.suite?.name;
@@ -134,6 +134,28 @@ export interface TestDriverOptions {
134
134
  analytics?: boolean;
135
135
 
136
136
  /**
137
+ * Cache configuration
138
+ * Set to false to disable caching entirely.
139
+ * Set to an object to configure thresholds.
140
+ * @example { cache: { enabled: true, thresholds: { find: { screen: 0.05, element: 0.8 }, assert: 0.05 } } }
141
+ */
142
+ cache?: boolean | {
143
+ enabled?: boolean;
144
+ thresholds?: {
145
+ /** Thresholds for find operations */
146
+ find?: {
147
+ /** Pixel diff threshold for screen comparison (0-1, default 0.05 = 5% diff allowed) */
148
+ screen?: number;
149
+ /** OpenCV template match threshold for element matching (0-1, default 0.8 = 80% correlation) */
150
+ element?: number;
151
+ };
152
+ /** Pixel diff threshold for assert operations (0-1, default 0.05 = 5% diff allowed) */
153
+ assert?: number;
154
+ };
155
+ };
156
+
157
+ /**
158
+ * @deprecated Use cache.thresholds instead
137
159
  * Cache thresholds for find operations
138
160
  */
139
161
  cacheThresholds?: {