testdriverai 7.3.32 → 7.3.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/.github/copilot-instructions.md +641 -0
  2. package/.github/skills/testdriver:assert/SKILL.md +31 -0
  3. package/.github/skills/testdriver:aws-setup/SKILL.md +1 -68
  4. package/.github/skills/testdriver:client/SKILL.md +33 -0
  5. package/.github/skills/testdriver:debugging-with-screenshots/SKILL.md +175 -45
  6. package/.github/skills/testdriver:find/SKILL.md +87 -0
  7. package/.github/skills/testdriver:parse/SKILL.md +124 -6
  8. package/.github/skills/testdriver:quickstart/SKILL.md +2 -17
  9. package/.github/skills/testdriver:reusable-code/SKILL.md +9 -0
  10. package/.github/skills/testdriver:running-tests/SKILL.md +4 -0
  11. package/.github/skills/testdriver:screenshot/SKILL.md +84 -3
  12. package/.github/skills/testdriver:scroll/SKILL.md +36 -0
  13. package/.github/skills/testdriver:testdriver/SKILL.md +194 -86
  14. package/CHANGELOG.md +4 -0
  15. package/docs/_data/examples-manifest.json +72 -64
  16. package/docs/v7/examples/ai.mdx +1 -1
  17. package/docs/v7/examples/assert.mdx +1 -1
  18. package/docs/v7/examples/chrome-extension.mdx +1 -1
  19. package/docs/v7/examples/drag-and-drop.mdx +1 -1
  20. package/docs/v7/examples/element-not-found.mdx +1 -1
  21. package/docs/v7/examples/hover-image.mdx +1 -1
  22. package/docs/v7/examples/hover-text.mdx +1 -1
  23. package/docs/v7/examples/installer.mdx +1 -1
  24. package/docs/v7/examples/launch-vscode-linux.mdx +1 -1
  25. package/docs/v7/examples/match-image.mdx +1 -1
  26. package/docs/v7/examples/press-keys.mdx +1 -1
  27. package/docs/v7/examples/scroll-keyboard.mdx +1 -1
  28. package/docs/v7/examples/scroll-until-image.mdx +1 -1
  29. package/docs/v7/examples/scroll-until-text.mdx +1 -1
  30. package/docs/v7/examples/scroll.mdx +1 -1
  31. package/docs/v7/examples/type.mdx +1 -1
  32. package/docs/v7/examples/windows-installer.mdx +1 -1
  33. package/package.json +1 -2
  34. package/sdk.js +1 -1
@@ -1,4 +1,3 @@
1
- ```skill
2
1
  ---
3
2
  name: testdriver:parse
4
3
  description: Detect all UI elements on screen using OmniParser
@@ -74,6 +73,10 @@ const result = await testdriver.parse();
74
73
 
75
74
  const clickable = result.elements.filter(e => e.interactivity === 'clickable');
76
75
  console.log(`Found ${clickable.length} clickable elements`);
76
+
77
+ clickable.forEach(el => {
78
+ console.log(`- "${el.content}" at (${el.bbox.x0}, ${el.bbox.y0})`);
79
+ });
77
80
  ```
78
81
 
79
82
  ### Find and Click an Element by Content
@@ -81,13 +84,16 @@ console.log(`Found ${clickable.length} clickable elements`);
81
84
  ```javascript
82
85
  const result = await testdriver.parse();
83
86
 
87
+ // Find a "Submit" button
84
88
  const submitBtn = result.elements.find(e =>
85
89
  e.content.toLowerCase().includes('submit') && e.interactivity === 'clickable'
86
90
  );
87
91
 
88
92
  if (submitBtn) {
93
+ // Calculate center of the bounding box
89
94
  const x = Math.round((submitBtn.bbox.x0 + submitBtn.bbox.x1) / 2);
90
95
  const y = Math.round((submitBtn.bbox.y0 + submitBtn.bbox.y1) / 2);
96
+
91
97
  await testdriver.click({ x, y });
92
98
  }
93
99
  ```
@@ -97,17 +103,130 @@ if (submitBtn) {
97
103
  ```javascript
98
104
  const result = await testdriver.parse();
99
105
 
106
+ // Get all text elements
100
107
  const textElements = result.elements.filter(e => e.type === 'text');
108
+ textElements.forEach(e => console.log(`Text: "${e.content}"`));
109
+
110
+ // Get all icons
101
111
  const icons = result.elements.filter(e => e.type === 'icon');
112
+ console.log(`Found ${icons.length} icons`);
113
+
114
+ // Get all buttons
102
115
  const buttons = result.elements.filter(e => e.type === 'button');
116
+ console.log(`Found ${buttons.length} buttons`);
117
+ ```
118
+
119
+ ### Build Custom Assertions
120
+
121
+ ```javascript
122
+ import { describe, expect, it } from "vitest";
123
+ import { TestDriver } from "testdriverai/lib/vitest/hooks.mjs";
124
+
125
+ describe("Login Page", () => {
126
+ it("should have expected form elements", async (context) => {
127
+ const testdriver = TestDriver(context);
128
+
129
+ await testdriver.provision.chrome({
130
+ url: 'https://myapp.com/login',
131
+ });
132
+
133
+ const result = await testdriver.parse();
134
+
135
+ // Assert expected elements exist
136
+ const textContent = result.elements.map(e => e.content.toLowerCase());
137
+ expect(textContent).toContain('email');
138
+ expect(textContent).toContain('password');
139
+
140
+ // Assert there are clickable elements
141
+ const clickable = result.elements.filter(e => e.interactivity === 'clickable');
142
+ expect(clickable.length).toBeGreaterThan(0);
143
+ });
144
+ });
145
+ ```
146
+
147
+ ### Use Bounding Box Coordinates
148
+
149
+ ```javascript
150
+ const result = await testdriver.parse();
151
+
152
+ result.elements.forEach(el => {
153
+ // Pixel coordinates
154
+ console.log(`Element "${el.content}":`);
155
+ console.log(` bbox: (${el.bbox.x0}, ${el.bbox.y0}) to (${el.bbox.x1}, ${el.bbox.y1})`);
156
+ console.log(` size: ${el.boundingBox.width}x${el.boundingBox.height}`);
157
+ console.log(` position: left=${el.boundingBox.left}, top=${el.boundingBox.top}`);
158
+ });
159
+ ```
160
+
161
+ ### View Annotated Screenshot
162
+
163
+ ```javascript
164
+ const result = await testdriver.parse();
165
+
166
+ // The annotated image shows all detected elements with bounding boxes
167
+ console.log('Annotated screenshot:', result.annotatedImageUrl);
168
+ console.log(`Image dimensions: ${result.imageWidth}x${result.imageHeight}`);
103
169
  ```
104
170
 
171
+ ## How It Works
172
+
173
+ 1. TestDriver captures a screenshot of the current screen
174
+ 2. The image is sent to the TestDriver API
175
+ 3. OmniParser v2 analyzes the image to detect all UI elements
176
+ 4. Each element is classified by type (text, icon, button, etc.) and interactivity
177
+ 5. Bounding box coordinates are returned in pixel coordinates matching the screen resolution
178
+
179
+ <Note>
180
+ OmniParser detects elements visually — it works with any UI framework, native apps, and even non-standard interfaces. It does not rely on DOM or accessibility trees.
181
+ </Note>
182
+
105
183
  ## Best Practices
106
184
 
107
- - Use `find()` for targeting specific elements — `parse()` is for full UI analysis
108
- - Filter by `interactivity` to distinguish clickable vs non-interactive elements
109
- - Wait for the page to stabilize before calling `parse()`
110
- - Use the `annotatedImageUrl` for visual debugging
185
+ <AccordionGroup>
186
+ <Accordion title="Use find() for targeting specific elements">
187
+ For locating and interacting with a specific element, prefer `find()` which uses AI vision. Use `parse()` when you need a complete inventory of all elements on screen.
188
+
189
+ ```javascript
190
+ // Prefer this for clicking a specific element
191
+ await testdriver.find("Submit button").click();
192
+
193
+ // Use parse() for full UI analysis
194
+ const result = await testdriver.parse();
195
+ const allButtons = result.elements.filter(e => e.type === 'button');
196
+ ```
197
+ </Accordion>
198
+
199
+ <Accordion title="Filter by interactivity">
200
+ Use the `interactivity` field to distinguish between clickable and non-interactive elements.
201
+
202
+ ```javascript
203
+ const result = await testdriver.parse();
204
+ const interactive = result.elements.filter(e => e.interactivity === 'clickable');
205
+ const static_ = result.elements.filter(e => e.interactivity === 'non-interactive');
206
+ ```
207
+ </Accordion>
208
+
209
+ <Accordion title="Wait for content to load">
210
+ If elements aren't being detected, the page may not be fully loaded. Add a wait first.
211
+
212
+ ```javascript
213
+ // Wait for page to stabilize
214
+ await testdriver.wait(2000);
215
+
216
+ // Then parse
217
+ const result = await testdriver.parse();
218
+ ```
219
+ </Accordion>
220
+
221
+ <Accordion title="Use the annotated image for debugging">
222
+ The `annotatedImageUrl` provides a visual overlay showing all detected elements with their bounding boxes — great for debugging.
223
+
224
+ ```javascript
225
+ const result = await testdriver.parse();
226
+ console.log('View annotated screenshot:', result.annotatedImageUrl);
227
+ ```
228
+ </Accordion>
229
+ </AccordionGroup>
111
230
 
112
231
  ## Related
113
232
 
@@ -115,4 +234,3 @@ const buttons = result.elements.filter(e => e.type === 'button');
115
234
  - [assert()](/v7/assert) - Make AI-powered assertions about screen state
116
235
  - [screenshot()](/v7/screenshot) - Capture screenshots
117
236
  - [Elements Reference](/v7/elements) - Complete Element API
118
- ```
@@ -14,27 +14,12 @@ TestDriver makes it easy to write automated computer-use tests for web browsers,
14
14
  Get started quickly with the TestDriver CLI.
15
15
 
16
16
  <Steps>
17
- <Step title="Create a TestDriver Account">
18
-
19
- You will need a TestDriver account to get an API key.
20
-
21
- <Card
22
- title="Get an API Key"
23
- icon="user-plus"
24
- href="https://console.testdriver.ai/team"
25
- arrow
26
- horizontal
27
- >
28
- Start with 60 free device minutes, no credit-card required!
29
- </Card>
30
-
31
- </Step>
32
17
  <Step title="Install TestDriver">
33
18
 
34
19
  Use `npx` to quickly set up an example project:
35
20
 
36
21
  ```bash
37
- npx testdriverai@beta init
22
+ npx testdriverai init
38
23
  ```
39
24
 
40
25
  This will walk you through creating a new project folder, installing dependencies, and setting up your API key.
@@ -79,7 +64,7 @@ TestDriver makes it easy to write automated computer-use tests for web browsers,
79
64
  Install Vitest and TestDriver as dev dependencies:
80
65
 
81
66
  ```bash
82
- npm install --save-dev vitest testdriverai@beta
67
+ npm install --save-dev vitest testdriverai
83
68
  ```
84
69
 
85
70
  </Step>
@@ -36,6 +36,15 @@ export async function logout(testdriver) {
36
36
  }
37
37
  ```
38
38
 
39
+ <Warning>
40
+ **Avoid hardcoding dynamic values in element descriptions.** Element selectors should describe the *type* of element, not specific content that might change.
41
+
42
+ **❌ Bad:** `await testdriver.find('profile name TestDriver in the top right')`
43
+ **✅ Good:** `await testdriver.find('user profile name in the top right')`
44
+
45
+ Hardcoded values like usernames, product names, or prices will cause tests to fail when the data changes. Use generic descriptions that work regardless of the specific content displayed.
46
+ </Warning>
47
+
39
48
  Now import and use these helpers in any test:
40
49
 
41
50
  ```javascript test/checkout.test.mjs
@@ -10,6 +10,10 @@ Learn how to run TestDriver tests efficiently with Vitest's powerful test runner
10
10
 
11
11
  TestDriver works with Vitest's powerful test runner.
12
12
 
13
+ <Info>
14
+ Install Vitest globally for best results: `npm install vitest -g`
15
+ </Info>
16
+
13
17
  ### Run All Tests
14
18
 
15
19
  ```bash
@@ -8,6 +8,10 @@ description: Capture and save screenshots during test execution
8
8
 
9
9
  Capture a screenshot of the current screen and automatically save it to a local file. Screenshots are organized by test file for easy debugging and review.
10
10
 
11
+ <Note>
12
+ **Automatic Screenshots (Default: Enabled)**: TestDriver automatically captures screenshots before and after every command (click, type, find, etc.). These are saved with descriptive filenames like `001-click-before-L42-submit-button.png` that include the line number from your test file. You can disable this with `autoScreenshots: false` in your TestDriver options.
13
+ </Note>
14
+
11
15
  ## Syntax
12
16
 
13
17
  ```javascript
@@ -32,12 +36,32 @@ Screenshots are automatically saved to `.testdriver/screenshots/<test-file-name>
32
36
  .testdriver/
33
37
  screenshots/
34
38
  login.test/
35
- screenshot-1737633600000.png
36
- login-page.png
39
+ 001-find-before-L15-email-input.png # Auto: before find()
40
+ 002-find-after-L15-email-input.png # Auto: after find()
41
+ 003-click-before-L16-email-input.png # Auto: before click()
42
+ 004-click-after-L16-email-input.png # Auto: after click()
43
+ 005-type-before-L17-userexamplecom.png # Auto: before type()
44
+ 006-type-after-L17-userexamplecom.png # Auto: after type()
45
+ custom-screenshot.png # Manual: screenshot("custom-screenshot")
37
46
  checkout.test/
38
- screenshot-1737633700000.png
47
+ 001-find-before-L12-checkout-button.png
48
+ ...
39
49
  ```
40
50
 
51
+ ### Automatic Screenshot Naming
52
+
53
+ When `autoScreenshots` is enabled (default), filenames follow this format:
54
+
55
+ `<seq>-<action>-<phase>-L<line>-<description>.png`
56
+
57
+ | Component | Description | Example |
58
+ |-----------|-------------|---------|
59
+ | `seq` | Sequential number (001, 002, ...) | `001` |
60
+ | `action` | Command name | `click`, `type`, `find` |
61
+ | `phase` | Before, after, or error | `before`, `after` |
62
+ | `L<line>` | Line number from test file | `L42` |
63
+ | `description` | Element description or action target | `submit-button` |
64
+
41
65
  <Note>
42
66
  The screenshot folder for each test file is automatically cleared when the test starts. This ensures you only see screenshots from the most recent test run.
43
67
  </Note>
@@ -107,9 +131,66 @@ describe("Login Flow", () => {
107
131
  });
108
132
  ```
109
133
 
134
+ ## Automatic Screenshots
135
+
136
+ By default, TestDriver captures screenshots **automatically** before and after every command. This creates a complete visual timeline of your test execution without any additional code.
137
+
138
+ ### Enabling/Disabling
139
+
140
+ ```javascript
141
+ // Auto-screenshots enabled by default
142
+ const testdriver = TestDriver(context);
143
+
144
+ // Explicitly disable if needed (not recommended)
145
+ const testdriver = TestDriver(context, {
146
+ autoScreenshots: false
147
+ });
148
+ ```
149
+
150
+ ### What Gets Captured
151
+
152
+ Automatic screenshots are taken around these commands:
153
+ - `find()` / `findAll()`
154
+ - `click()` / `hover()` / `doubleClick()` / `rightClick()`
155
+ - `type()` / `pressKeys()`
156
+ - `scroll()` / `scrollUntilText()` / `scrollUntilImage()`
157
+ - `waitForText()` / `waitForImage()`
158
+ - `focusApplication()`
159
+ - `assert()` / `extract()` / `exec()`
160
+
161
+ ### Example Output
162
+
163
+ For this test code:
164
+
165
+ ```javascript
166
+ // Line 15: Find email input
167
+ const emailInput = await testdriver.find("email input");
168
+ // Line 16: Click it
169
+ await emailInput.click();
170
+ // Line 17: Type email
171
+ await testdriver.type("user@example.com");
172
+ ```
173
+
174
+ TestDriver automatically saves:
175
+
176
+ ```
177
+ 001-find-before-L15-email-input.png
178
+ 002-find-after-L15-email-input.png
179
+ 003-click-before-L16-email-input.png
180
+ 004-click-after-L16-email-input.png
181
+ 005-type-before-L17-userexamplecom.png
182
+ 006-type-after-L17-userexamplecom.png
183
+ ```
184
+
185
+ If an error occurs, the phase will be `error` instead of `after`.
186
+
110
187
  ## Best Practices
111
188
 
112
189
  <AccordionGroup>
190
+ <Accordion title="Let automatic screenshots do the work">
191
+ With `autoScreenshots: true` (default), you get comprehensive coverage without adding manual `screenshot()` calls. Only add manual screenshots for specific named checkpoints.
192
+ </Accordion>
193
+
113
194
  <Accordion title="Use screenshots for debugging flaky tests">
114
195
  When a test fails intermittently, add screenshots at key steps to capture the actual screen state. This helps identify timing issues or unexpected UI states.
115
196
  </Accordion>
@@ -8,6 +8,21 @@ description: Scroll pages and elements
8
8
 
9
9
  Scroll the page or active element in any direction using mouse wheel or keyboard.
10
10
 
11
+ <Warning>
12
+ **Focus Requirements**
13
+
14
+ Scrolling requires the page or a frame to be focused. If an input field or other interactive element has focus, scroll commands may not work as expected. Before scrolling, ensure focus is on the page by:
15
+ - Clicking on a non-interactive area (e.g., page background)
16
+ - Pressing the Escape key to unfocus interactive elements
17
+ - Clicking outside of input fields or text areas
18
+
19
+ **If scroll is still not working**, try using Page Down/Page Up keys directly:
20
+ ```javascript
21
+ await testdriver.pressKeys(['pagedown']); // Scroll down
22
+ await testdriver.pressKeys(['pageup']); // Scroll up
23
+ ```
24
+ </Warning>
25
+
11
26
  ## Syntax
12
27
 
13
28
  ```javascript
@@ -133,6 +148,27 @@ await testdriver.scrollUntilImage('loading spinner', 'down', 5000, 'keyboard', n
133
148
 
134
149
  ## Best Practices
135
150
 
151
+ <Check>
152
+ **Ensure page has focus before scrolling**
153
+
154
+ ```javascript
155
+ // After typing in an input, unfocus it first
156
+ await testdriver.find('email input').click();
157
+ await testdriver.type('user@example.com');
158
+
159
+ // Click elsewhere or press Escape before scrolling
160
+ await testdriver.pressKeys(['escape']);
161
+ // Or click a non-interactive area
162
+ // await testdriver.find('page background').click();
163
+
164
+ // Now scroll will work properly
165
+ await testdriver.scroll('down', 300);
166
+
167
+ // If scroll still doesn't work, use Page Down directly
168
+ // await testdriver.pressKeys(['pagedown']);
169
+ ```
170
+ </Check>
171
+
136
172
  <Check>
137
173
  **Choose the right scroll method**
138
174