testdriverai 7.2.3 → 7.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,13 @@
1
- name: Publish Beta
1
+ name: Publish
2
2
  permissions:
3
3
  contents: write
4
+ id-token: write # Required for OIDC
4
5
  on:
5
6
  push:
6
7
  branches: [ main ]
7
8
 
8
9
  jobs:
9
- publish-beta:
10
+ publish:
10
11
  runs-on: ubuntu-latest
11
12
 
12
13
  steps:
@@ -29,16 +30,23 @@ jobs:
29
30
  - name: Install dependencies
30
31
  run: npm ci
31
32
 
32
- - name: Bump version (prerelease beta)
33
- run: npm version prerelease --preid=beta --no-git-tag-version
33
+ - name: Bump version (patch)
34
+ run: npm version patch --no-git-tag-version
34
35
 
35
36
  - name: Commit and push version bump
36
37
  run: |
37
38
  git add package.json package-lock.json
38
- git commit -m "chore: bump beta version to $(node -p "require('./package.json').version")"
39
+ git commit -m "chore: bump version to $(node -p "require('./package.json').version")"
39
40
  git push
40
41
 
41
- - name: Publish to npm under beta tag
42
- run: npm publish --tag beta
42
+ - name: Debug NPM Token
43
+ run: |
44
+ echo "NPM_TOKEN is set: ${{ secrets.NPM_TOKEN != '' }}"
45
+ echo "NPM_TOKEN first 4 chars: ${NPM_TOKEN:0:4}..."
46
+ env:
47
+ NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
48
+
49
+ - name: Publish to npm
50
+ run: npm publish --tag beta
43
51
  env:
44
52
  NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@@ -0,0 +1,36 @@
1
+ name: TestDriver.ai Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, master ]
6
+ pull_request:
7
+ branches: [ main, master ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Setup Node.js
17
+ uses: actions/setup-node@v4
18
+ with:
19
+ node-version: '20'
20
+ cache: 'npm'
21
+
22
+ - name: Install dependencies
23
+ run: npm ci
24
+
25
+ - name: Run TestDriver.ai tests
26
+ env:
27
+ TD_API_KEY: ${{ secrets.TD_API_KEY }}
28
+ run: npx vitest run
29
+
30
+ - name: Upload test results
31
+ if: always()
32
+ uses: actions/upload-artifact@v4
33
+ with:
34
+ name: test-results
35
+ path: test-results/
36
+ retention-days: 30
package/agent/index.js CHANGED
@@ -17,7 +17,6 @@ const diff = require("diff");
17
17
 
18
18
  // global utilities
19
19
  const generator = require("./lib/generator.js");
20
- const promptCache = require("./lib/cache.js");
21
20
  const theme = require("./lib/theme.js");
22
21
  const SourceMapper = require("./lib/source-mapper.js");
23
22
 
@@ -110,6 +109,10 @@ class TestDriverAgent extends EventEmitter2 {
110
109
  // Create sandbox instance with this agent's emitter, analytics, and session
111
110
  this.sandbox = createSandbox(this.emitter, this.analytics, this.session);
112
111
 
112
+ // Attach Sentry log listeners to capture CLI logs as breadcrumbs
113
+ const sentry = require("../lib/sentry");
114
+ sentry.attachLogListeners(this.emitter);
115
+
113
116
  // Set the OS for the sandbox to use
114
117
  this.sandbox.os = this.sandboxOs;
115
118
 
@@ -191,6 +194,15 @@ class TestDriverAgent extends EventEmitter2 {
191
194
  this.redraw.cleanup();
192
195
  }
193
196
 
197
+ // Close sandbox connection to release the connection slot
198
+ if (this.sandbox) {
199
+ try {
200
+ this.sandbox.close();
201
+ } catch (err) {
202
+ // Ignore sandbox close errors during exit
203
+ }
204
+ }
205
+
194
206
  shouldRunPostrun =
195
207
  !this.hasRunPostrun &&
196
208
  (shouldRunPostrun || this.cliArgs?.command == "run");
@@ -356,7 +368,7 @@ class TestDriverAgent extends EventEmitter2 {
356
368
  image,
357
369
  },
358
370
  (chunk) => {
359
- if (chunk.type === "data") {
371
+ if (chunk.type === "data" && chunk.data) {
360
372
  this.emitter.emit(events.log.markdown.chunk, streamId, chunk.data);
361
373
  }
362
374
  },
@@ -420,9 +432,6 @@ class TestDriverAgent extends EventEmitter2 {
420
432
  let mousePosition = await this.system.getMousePosition();
421
433
  let activeWindow = await this.system.activeWin();
422
434
 
423
- const streamId = `check-${Date.now()}`;
424
- this.emitter.emit(events.log.markdown.start, streamId);
425
-
426
435
  let response = await this.sdk.req(
427
436
  "check",
428
437
  {
@@ -430,15 +439,10 @@ class TestDriverAgent extends EventEmitter2 {
430
439
  images,
431
440
  mousePosition,
432
441
  activeWindow,
433
- },
434
- (chunk) => {
435
- if (chunk.type === "data") {
436
- this.emitter.emit(events.log.markdown.chunk, streamId, chunk.data);
437
- }
438
- },
442
+ }
439
443
  );
440
444
 
441
- this.emitter.emit(events.log.markdown.end, streamId);
445
+ this.emitter.emit(events.log.markdown.static, response.data);
442
446
 
443
447
  this.lastScreenshot = thisScreenshot;
444
448
 
@@ -869,8 +873,7 @@ commands:
869
873
  currentTask,
870
874
  dry = false,
871
875
  validateAndLoop = false,
872
- shouldSave = true,
873
- useCache = true,
876
+ shouldSave = true
874
877
  ) {
875
878
  // Check if execution has been stopped
876
879
  if (this.stopped) {
@@ -889,56 +892,10 @@ commands:
889
892
 
890
893
  this.tasks.push(currentTask);
891
894
 
892
- // Check cache first (if enabled via parameter)
893
- const cachedYaml = useCache ? promptCache.readCache(currentTask) : null;
894
-
895
- if (cachedYaml) {
896
- // Cache hit - load and execute the cached YAML file
897
- this.emitter.emit(
898
- events.log.debug,
899
- `Using cached response for prompt: "${currentTask}"`,
900
- );
901
- this.emitter.emit(events.log.log, theme.dim("(using cached response)"));
902
-
903
- try {
904
- // Load the YAML using hydrateFromYML
905
- const parsed = await generator.hydrateFromYML(
906
- cachedYaml,
907
- this.sessionInstance,
908
- );
909
-
910
- // Execute the commands from the first step
911
- if (parsed.steps && parsed.steps.length > 0) {
912
- const step = parsed.steps[0];
913
- if (step.commands) {
914
- await this.executeCommands(
915
- step.commands,
916
- 0,
917
- false,
918
- dry,
919
- shouldSave,
920
- );
921
- }
922
- }
923
- } catch (err) {
924
- this.emitter.emit(
925
- events.log.debug,
926
- `Error loading cached YAML: ${err.message}, falling back to API`,
927
- );
928
- // Fall through to make API call if cache is invalid
929
- }
930
-
931
- return;
932
- }
933
-
934
- // Cache miss - call the API
935
895
  this.emitter.emit(events.log.narration, theme.dim("thinking..."), true);
936
896
 
937
897
  this.lastScreenshot = await this.system.captureScreenBase64();
938
898
 
939
- const streamId = `input-${Date.now()}`;
940
- this.emitter.emit(events.log.markdown.start, streamId);
941
-
942
899
  let message = await this.sdk.req(
943
900
  "input",
944
901
  {
@@ -946,59 +903,12 @@ commands:
946
903
  mousePosition: await this.system.getMousePosition(),
947
904
  activeWindow: await this.system.activeWin(),
948
905
  image: this.lastScreenshot,
949
- },
950
- (chunk) => {
951
- if (chunk.type === "data") {
952
- this.emitter.emit(events.log.markdown.chunk, streamId, chunk.data);
953
- }
954
- },
906
+ }
955
907
  );
956
908
 
957
- this.emitter.emit(events.log.markdown.end, streamId);
909
+ this.emitter.emit(events.log.log, message.data);
958
910
 
959
911
  if (message && message.data) {
960
- // Save the YAML to cache (if enabled)
961
- if (useCache) {
962
- try {
963
- // Extract YAML code blocks from the markdown response
964
- const codeblocks = await this.parser.findCodeBlocks(message.data);
965
- if (codeblocks && codeblocks.length > 0) {
966
- // Parse commands from all code blocks
967
- const allCommands = [];
968
- for (const block of codeblocks) {
969
- const commands = await this.parser.getCommands(block);
970
- allCommands.push(...commands);
971
- }
972
-
973
- // Create a proper step with prompt
974
- const step = {
975
- prompt: currentTask,
976
- commands: allCommands,
977
- };
978
-
979
- // Use dumpToYML to create a valid testdriver yaml file
980
- const yamlContent = await generator.dumpToYML(
981
- [step],
982
- this.sessionInstance,
983
- );
984
-
985
- const cachePath = promptCache.writeCache(currentTask, yamlContent);
986
- if (cachePath) {
987
- this.emitter.emit(
988
- events.log.debug,
989
- `Cached YAML saved to: ${cachePath}`,
990
- );
991
- }
992
- }
993
- } catch (err) {
994
- // If we can't extract YAML, just skip caching
995
- this.emitter.emit(
996
- events.log.debug,
997
- `Could not cache response: ${err.message}`,
998
- );
999
- }
1000
- }
1001
-
1002
912
  await this.aiExecute(message.data, validateAndLoop, dry, shouldSave);
1003
913
  this.emitter.emit(
1004
914
  events.log.debug,
@@ -2200,6 +2110,15 @@ Please check your network connection, TD_API_KEY, or the service status.`,
2200
2110
  }
2201
2111
 
2202
2112
  this.session.set(sessionRes.data.id);
2113
+
2114
+ // Set Sentry session trace context for distributed tracing
2115
+ // This links CLI errors/logs to the same trace as API calls
2116
+ try {
2117
+ const sentry = require("../lib/sentry");
2118
+ sentry.setSessionTraceContext(sessionRes.data.id);
2119
+ } catch (e) {
2120
+ // Sentry module may not be available, ignore
2121
+ }
2203
2122
  }
2204
2123
 
2205
2124
  // Helper method to find testdriver directory by traversing up from a file path
@@ -1,5 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ // Initialize Sentry first, before any other modules
4
+ const sentry = require("../lib/sentry");
5
+
3
6
  // Set process priority if possible
4
7
  const os = require("os");
5
8
  try {
@@ -10,5 +13,10 @@ try {
10
13
  // Ignore if not permitted
11
14
  }
12
15
 
16
+ // Ensure Sentry flushes on exit
17
+ process.on("beforeExit", async () => {
18
+ await sentry.flush();
19
+ });
20
+
13
21
  // Run the CLI
14
22
  require("../interfaces/cli.js");
@@ -307,9 +307,46 @@
307
307
  text-align: center;
308
308
  user-select: none;
309
309
  }
310
+
311
+ .close-button {
312
+ position: fixed;
313
+ top: 12px;
314
+ right: 12px;
315
+ z-index: 100;
316
+ background: rgba(0, 0, 0, 0.8);
317
+ border: 1px solid #444;
318
+ color: #fff;
319
+ padding: 8px 16px;
320
+ border-radius: 6px;
321
+ cursor: pointer;
322
+ font-size: 13px;
323
+ font-weight: 500;
324
+ pointer-events: auto;
325
+ transition: all 0.2s ease;
326
+ display: flex;
327
+ align-items: center;
328
+ gap: 6px;
329
+ }
330
+
331
+ .close-button:hover {
332
+ background: rgba(220, 53, 69, 0.9);
333
+ border-color: #dc3545;
334
+ }
335
+
336
+ .close-button svg {
337
+ width: 14px;
338
+ height: 14px;
339
+ fill: currentColor;
340
+ }
310
341
  </style>
311
342
  </head>
312
343
  <body>
344
+ <!-- Close window button -->
345
+ <button class="close-button" onclick="window.close()" title="Close this window">
346
+ <svg viewBox="0 0 24 24"><path d="M19 6.41L17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
347
+ Close
348
+ </button>
349
+
313
350
  <!-- Loading screen -->
314
351
  <div class="loading-screen" id="loading-screen">
315
352
  <div class="testdriver-logo">
@@ -44,7 +44,6 @@ This system provides comprehensive test execution tracking, linking test runs wi
44
44
  │ │ │ │
45
45
  │ │ • TdTestRun │ │
46
46
  │ │ • TdTestCase │ │
47
- │ │ • TdSandbox │ │
48
47
  │ │ • Replay │ │
49
48
  │ └────────────────┘ │
50
49
  │ │
@@ -95,7 +94,6 @@ Represents a complete test suite execution (e.g., `npx vitest run`).
95
94
 
96
95
  **Relationships:**
97
96
  - `team`: Owner team
98
- - `sandbox`: TdSandbox where tests ran
99
97
  - `testCases`: Collection of TdTestCase
100
98
  - `replays`: Associated Replay records
101
99
 
@@ -114,36 +112,13 @@ Represents an individual test within a test run.
114
112
 
115
113
  **Relationships:**
116
114
  - `testRun`: Parent TdTestRun
117
- - `replay`: Associated Replay record
118
-
119
- ### TdSandbox
120
- Represents a spawned VM/sandbox instance.
121
-
122
- **Key Fields:**
123
- - `sandboxId`: Unique identifier
124
- - `platform`: windows | mac | linux
125
- - `status`: provisioning | ready | running | stopped | terminated
126
- - `instanceId`, `instanceType`: AWS EC2 details
127
- - `ipAddress`, `vncUrl`, `wsUrl`: Connection details
128
- - `spawnTime`, `readyTime`, `terminateTime`: Lifecycle timestamps
129
- - `dashcamAuth`: Whether dashcam was authenticated
130
- - `dashcamProjectId`: Dashcam project for replays
131
-
132
- **Relationships:**
133
- - `team`: Owner team
134
- - `user`: User who spawned it
135
- - `testRuns`: Tests that ran on this sandbox
136
- - `replays`: Dashcam recordings from this sandbox
137
-
138
- **Note:** Sandbox creation/updates happen via WebSocket (not REST API) as part of the sandbox provisioning flow.
139
-
115
+ - `replay`: Associated Replay recor
140
116
  ### Replay (Extended)
141
117
  Existing model extended with test run associations.
142
118
 
143
119
  **New Fields:**
144
120
  - `tdTestRun`: Associated test run
145
121
  - `tdTestCase`: Associated test case
146
- - `tdSandbox`: Sandbox where recorded
147
122
 
148
123
  ## API Endpoints
149
124
 
@@ -185,7 +185,6 @@ await client.completeTestRun({
185
185
  **API (Backend)**
186
186
  - `api/models/TdTestRun.js` - Test run model
187
187
  - `api/models/TdTestCase.js` - Test case model
188
- - `api/models/TdSandbox.js` - Sandbox tracking model
189
188
  - `api/controllers/testdriver/testdriver-test-run-create.js` - Create test run endpoint
190
189
  - `api/controllers/testdriver/testdriver-test-run-complete.js` - Complete test run endpoint
191
190
  - `api/controllers/testdriver/testdriver-test-case-create.js` - Record test case endpoint
@@ -376,12 +376,6 @@ const run2 = await client.createTestRun({
376
376
  - Associated dashcam replay
377
377
  - Timing and duration
378
378
 
379
- ### TdSandbox
380
- - VM/sandbox lifecycle tracking
381
- - Platform and OS information
382
- - Dashcam integration status
383
- - Cost and usage metrics
384
-
385
379
  ### Replay
386
380
  - Dashcam recordings
387
381
  - Linked to test runs and cases
@@ -3,6 +3,7 @@ title: "act()"
3
3
  sidebarTitle: "act"
4
4
  description: "Execute natural language tasks using AI"
5
5
  icon: "wand-magic-sparkles"
6
+ tag: beta
6
7
  ---
7
8
 
8
9
  ## Overview
@@ -24,6 +24,7 @@ class InitCommand extends BaseCommand {
24
24
 
25
25
  console.log(chalk.green("\n✅ Project initialized successfully!\n"));
26
26
  this.printNextSteps();
27
+ process.exit(0);
27
28
  }
28
29
 
29
30
  /**
@@ -79,28 +80,41 @@ class InitCommand extends BaseCommand {
79
80
  */
80
81
  async promptHidden(question) {
81
82
  return new Promise((resolve) => {
82
- const rl = readline.createInterface({
83
- input: process.stdin,
84
- output: process.stdout,
85
- });
86
-
87
- // Mute output to hide the input
83
+ process.stdout.write(question);
84
+
88
85
  const stdin = process.stdin;
89
- const muted = {
90
- write: () => {},
86
+ const wasRaw = stdin.isRaw;
87
+ stdin.setRawMode(true);
88
+ stdin.resume();
89
+ stdin.setEncoding("utf8");
90
+
91
+ let input = "";
92
+
93
+ const onData = (char) => {
94
+ // Handle Ctrl+C
95
+ if (char === "\u0003") {
96
+ stdin.setRawMode(wasRaw);
97
+ process.exit();
98
+ }
99
+ // Handle Enter
100
+ if (char === "\r" || char === "\n") {
101
+ stdin.setRawMode(wasRaw);
102
+ stdin.removeListener("data", onData);
103
+ stdin.pause();
104
+ console.log(""); // New line after hidden input
105
+ resolve(input);
106
+ return;
107
+ }
108
+ // Handle Backspace
109
+ if (char === "\u007F" || char === "\b") {
110
+ input = input.slice(0, -1);
111
+ return;
112
+ }
113
+ // Add character to input (but don't echo it)
114
+ input += char;
91
115
  };
92
116
 
93
- rl.question(question, (answer) => {
94
- rl.close();
95
- stdin.removeListener("data", muted.write);
96
- console.log(""); // New line after hidden input
97
- resolve(answer);
98
- });
99
-
100
- // Mute stdin to hide input
101
- stdin.on("data", (char) => {
102
- // Don't write to output (hides the input)
103
- });
117
+ stdin.on("data", onData);
104
118
  });
105
119
  }
106
120
 
@@ -23,6 +23,7 @@ async function openBrowser(url) {
23
23
  await open(url, {
24
24
  // Wait for the app to open
25
25
  wait: false,
26
+ background: true
26
27
  });
27
28
  } catch (error) {
28
29
  console.error("Failed to open browser automatically:", error);
@@ -131,9 +132,32 @@ class BaseCommand extends Command {
131
132
  }
132
133
 
133
134
  this.agent.emitter.on("exit", (exitCode) => {
135
+ // Ensure sandbox is closed before exiting
136
+ if (this.agent?.sandbox) {
137
+ try {
138
+ this.agent.sandbox.close();
139
+ } catch (err) {
140
+ // Ignore close errors
141
+ }
142
+ }
134
143
  process.exit(exitCode);
135
144
  });
136
145
 
146
+ // Handle process signals to ensure clean disconnection
147
+ const cleanupAndExit = () => {
148
+ if (this.agent?.sandbox) {
149
+ try {
150
+ this.agent.sandbox.close();
151
+ } catch (err) {
152
+ // Ignore close errors
153
+ }
154
+ }
155
+ process.exit(1);
156
+ };
157
+
158
+ process.on('SIGINT', cleanupAndExit);
159
+ process.on('SIGTERM', cleanupAndExit);
160
+
137
161
  // Handle unhandled promise rejections to prevent them from interfering with the exit flow
138
162
  // This is particularly important when JavaScript execution in VM contexts leaves dangling promises
139
163
  process.on("unhandledRejection", (reason) => {
package/interfaces/cli.js CHANGED
@@ -1,13 +1,20 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  const { run } = require("@oclif/core");
4
+ const sentry = require("../lib/sentry");
4
5
 
5
6
  // Run oclif (with default command handling built-in)
6
7
  run()
7
8
  .then(() => {
8
9
  // Success
9
10
  })
10
- .catch((error) => {
11
+ .catch(async (error) => {
12
+ // Capture error in Sentry
13
+ sentry.captureException(error, {
14
+ tags: { component: "cli-init" },
15
+ });
16
+ await sentry.flush();
17
+
11
18
  console.error("Failed to start TestDriver.ai agent:", error);
12
19
  process.exit(1);
13
20
  });
@@ -300,6 +300,9 @@ marked.use(
300
300
  );
301
301
 
302
302
  const createMarkdownLogger = (emitter) => {
303
+ // Indent prefix for streaming AI thoughts - makes it visually distinct and scoped
304
+ const streamIndent = "";
305
+
303
306
  const markedParsePartial = (markdown, start = 0, end = 0) => {
304
307
  let result = markdown.trimEnd().split("\n").slice(start, end);
305
308
  if (end <= 0) {
@@ -307,7 +310,8 @@ const createMarkdownLogger = (emitter) => {
307
310
  }
308
311
  result = result.join("\n");
309
312
 
310
- return marked.parse(result).replace(/^/gm, spaceChar).trimEnd();
313
+ // Use streamIndent for streaming output to make it visually scoped
314
+ return marked.parse(result).replace(/^/gm, streamIndent).trimEnd();
311
315
  };
312
316
 
313
317
  // Event-based markdown streaming with buffering
@@ -360,7 +364,8 @@ const createMarkdownLogger = (emitter) => {
360
364
  diff = censorSensitiveDataDeep(diff);
361
365
  process.stdout.write(diff);
362
366
  }
363
- process.stdout.write("\n\n");
367
+ // Use console.log for the final newlines so it gets captured by vitest
368
+ console.log("");
364
369
 
365
370
  // Clean up the stream
366
371
  activeStreams.delete(streamId);
@@ -384,7 +389,7 @@ const createMarkdownLogger = (emitter) => {
384
389
  });
385
390
  };
386
391
 
387
- const spaceChar = " ";
392
+ const spaceChar = " ";
388
393
 
389
394
  module.exports = {
390
395
  logger,