testdriverai 7.8.0 → 7.9.0-test.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/agent/index.js +12 -0
  2. package/agent/lib/http.js +21 -3
  3. package/agent/lib/logger.js +15 -0
  4. package/agent/lib/provision-commands.js +176 -0
  5. package/agent/lib/sandbox.js +667 -118
  6. package/agent/lib/sdk.js +1 -20
  7. package/ai/skills/testdriver-find/SKILL.md +14 -20
  8. package/docs/_data/examples-manifest.json +46 -46
  9. package/docs/_scripts/extract-example-urls.js +67 -72
  10. package/docs/docs.json +2 -1
  11. package/docs/v7/examples/ai.mdx +1 -1
  12. package/docs/v7/examples/assert.mdx +1 -1
  13. package/docs/v7/examples/captcha-api.mdx +1 -1
  14. package/docs/v7/examples/chrome-extension.mdx +1 -1
  15. package/docs/v7/examples/drag-and-drop.mdx +1 -1
  16. package/docs/v7/examples/element-not-found.mdx +1 -1
  17. package/docs/v7/examples/exec-output.mdx +1 -1
  18. package/docs/v7/examples/exec-pwsh.mdx +1 -1
  19. package/docs/v7/examples/focus-window.mdx +1 -1
  20. package/docs/v7/examples/hover-image.mdx +1 -1
  21. package/docs/v7/examples/hover-text.mdx +1 -1
  22. package/docs/v7/examples/installer.mdx +1 -1
  23. package/docs/v7/examples/launch-vscode-linux.mdx +1 -1
  24. package/docs/v7/examples/match-image.mdx +1 -1
  25. package/docs/v7/examples/press-keys.mdx +1 -1
  26. package/docs/v7/examples/scroll-keyboard.mdx +1 -1
  27. package/docs/v7/examples/scroll-until-image.mdx +1 -1
  28. package/docs/v7/examples/scroll-until-text.mdx +1 -1
  29. package/docs/v7/examples/scroll.mdx +1 -1
  30. package/docs/v7/examples/type.mdx +1 -1
  31. package/docs/v7/examples/windows-installer.mdx +1 -1
  32. package/docs/v7/find.mdx +14 -20
  33. package/docs/v7/test-results-json.mdx +258 -0
  34. package/examples/scroll-keyboard.test.mjs +1 -1
  35. package/examples/scroll.test.mjs +1 -12
  36. package/interfaces/vitest-plugin.mjs +167 -51
  37. package/lib/core/Dashcam.js +16 -22
  38. package/lib/environments.json +8 -4
  39. package/lib/github-comment.mjs +58 -40
  40. package/lib/init-project.js +5 -67
  41. package/lib/resolve-channel.js +39 -10
  42. package/lib/sentry.js +47 -23
  43. package/lib/vitest/hooks.mjs +117 -20
  44. package/manual/exec-stream-logs.test.mjs +25 -0
  45. package/mcp-server/dist/server.mjs +28 -8
  46. package/mcp-server/src/server.ts +31 -8
  47. package/package.json +2 -1
  48. package/sdk.d.ts +4 -0
  49. package/sdk.js +42 -12
  50. package/setup/aws/install-dev-runner.sh +79 -0
  51. package/setup/aws/spawn-runner.sh +165 -0
  52. package/test-sentry-span.js +35 -0
  53. package/vitest.config.mjs +7 -3
  54. package/vitest.runner.config.mjs +33 -0
  55. package/docs/v7/_drafts/core.mdx +0 -458
@@ -42,14 +42,14 @@ function checkVitestVersion() {
42
42
  if (major < MINIMUM_VITEST_VERSION) {
43
43
  throw new Error(
44
44
  `TestDriver requires Vitest >= ${MINIMUM_VITEST_VERSION}.0.0, but found ${version}. ` +
45
- `Please upgrade Vitest: npm install vitest@latest`,
45
+ `Please upgrade Vitest: npm install vitest@latest`,
46
46
  );
47
47
  }
48
48
  } catch (err) {
49
49
  if (err.code === "MODULE_NOT_FOUND") {
50
50
  throw new Error(
51
51
  "TestDriver requires Vitest to be installed. " +
52
- "Please install it: npm install vitest@latest",
52
+ "Please install it: npm install vitest@latest",
53
53
  );
54
54
  }
55
55
  throw err;
@@ -181,18 +181,33 @@ function setupConsoleSpy(client, taskId) {
181
181
 
182
182
  /**
183
183
  * Unregister a client so its sandbox no longer receives forwarded logs.
184
- * When the last client is removed we restore the original console methods so
185
- * the Vitest worker fork can exit cleanly (unreleased vi.spyOn mocks prevent
186
- * the worker from shutting down, producing "Worker exited unexpectedly").
187
- * If another test starts later (e.g. a retry), installConsoleSpy() will
188
- * re-install the spy on demand.
184
+ *
185
+ * Between sequential `it()` blocks we intentionally keep the spies installed.
186
+ * The `bufferConsoleToClients` function is a no-op when `activeClients` is
187
+ * empty, so leaving the spy in place is harmless and avoids a non-atomic
188
+ * restore/re-install race that can corrupt console method references.
189
+ *
190
+ * Spies are torn down once at process exit so the Vitest worker fork can
191
+ * shut down cleanly (unreleased vi.spyOn mocks prevent exit).
192
+ *
189
193
  * @param {import('../../sdk.js').default} client - TestDriver client instance
190
194
  */
191
195
  function cleanupConsoleSpy(client) {
192
196
  _consoleSpy.activeClients.delete(client);
193
197
 
194
- // Restore spies when no tests need them — allows clean worker exit
195
- if (_consoleSpy.activeClients.size === 0 && _consoleSpy.spies) {
198
+ if (debugConsoleSpy) {
199
+ process.stdout.write(
200
+ `[DEBUG cleanupConsoleSpy] clients remaining: ${_consoleSpy.activeClients.size}\n`,
201
+ );
202
+ }
203
+ }
204
+
205
+ /**
206
+ * Tear down the singleton console spy completely.
207
+ * Called once on process exit so the Vitest worker can shut down cleanly.
208
+ */
209
+ function teardownConsoleSpy() {
210
+ if (_consoleSpy.spies) {
196
211
  _consoleSpy.spies.log.mockRestore();
197
212
  _consoleSpy.spies.error.mockRestore();
198
213
  _consoleSpy.spies.warn.mockRestore();
@@ -202,21 +217,26 @@ function cleanupConsoleSpy(client) {
202
217
  _consoleSpy.installed = false;
203
218
 
204
219
  if (debugConsoleSpy) {
205
- process.stdout.write("[DEBUG cleanupConsoleSpy] All spies restored\n");
220
+ process.stdout.write("[DEBUG teardownConsoleSpy] All spies restored\n");
206
221
  }
207
222
  }
208
-
209
- if (debugConsoleSpy) {
210
- process.stdout.write(
211
- `[DEBUG cleanupConsoleSpy] clients remaining: ${_consoleSpy.activeClients.size}\n`,
212
- );
213
- }
214
223
  }
215
224
 
225
+ // Restore console spies on process exit so the Vitest worker can exit cleanly
226
+ process.on("exit", teardownConsoleSpy);
227
+
216
228
  // Weak maps to store instances per test context
217
229
  const testDriverInstances = new WeakMap();
218
230
  const lifecycleHandlers = new WeakMap();
219
231
 
232
+ /**
233
+ * Module-level promise tracking the most recent test's disconnect.
234
+ * When sequential `it()` blocks run, the next test awaits this promise
235
+ * before connecting — ensuring the previous sandbox is fully torn down
236
+ * even if the cleanup's disconnect timeout fired early.
237
+ */
238
+ let _pendingDisconnect = null;
239
+
220
240
  /**
221
241
  * Upload buffered SDK + console logs directly to S3 via the existing Log system.
222
242
  * Extracts the replayId from the dashcam URL, calls POST /api/v1/logs to create
@@ -403,6 +423,11 @@ export function TestDriver(context, options = {}) {
403
423
  config.apiRoot = process.env.TD_API_ROOT;
404
424
  }
405
425
 
426
+ // Use TD_E2B_TEMPLATE_ID from environment if not provided in config
427
+ if (!config.e2bTemplateId && process.env.TD_E2B_TEMPLATE_ID) {
428
+ config.e2bTemplateId = process.env.TD_E2B_TEMPLATE_ID;
429
+ }
430
+
406
431
  const testdriver = new TestDriverSDK(apiKey, config);
407
432
  testdriver.__vitestContext = context.task;
408
433
  testdriver._debugOnFailure = mergedOptions.debugOnFailure || false;
@@ -431,6 +456,14 @@ export function TestDriver(context, options = {}) {
431
456
  const debugConsoleSpy = process.env.TD_DEBUG_CONSOLE_SPY === "true";
432
457
 
433
458
  testdriver.__connectionPromise = (async () => {
459
+ // Wait for any previous test's disconnect to fully complete.
460
+ // This prevents the new sandbox connection from racing with a
461
+ // lingering disconnect when sequential `it()` blocks run.
462
+ if (_pendingDisconnect) {
463
+ await _pendingDisconnect.catch(() => {});
464
+ _pendingDisconnect = null;
465
+ }
466
+
434
467
  if (debugConsoleSpy) {
435
468
  console.log(
436
469
  "[DEBUG] Before auth - sandbox.instanceSocketConnected:",
@@ -644,15 +677,79 @@ export function TestDriver(context, options = {}) {
644
677
  // Clean up console spies
645
678
  cleanupConsoleSpy(currentInstance);
646
679
 
680
+ // Build test result metadata for JSON report output
681
+ {
682
+ const sdkPkg = require("../../package.json");
683
+ const inst = currentInstance.getInstance?.() || {};
684
+ const sbx = currentInstance.sandbox || {};
685
+ const apiRoot = currentInstance.config?.TD_API_ROOT || null;
686
+
687
+ context.task.meta.testResult = {
688
+ // Versions
689
+ sdkVersion: sdkPkg.version || null,
690
+ apiVersion: currentInstance._apiVersion || null,
691
+ runnerVersionBefore: inst.runnerVersionBefore || null,
692
+ runnerVersionAfter: inst.runnerVersionAfter || null,
693
+ wasUpdated: inst.wasUpdated || false,
694
+
695
+ // URLs
696
+ apiUrl: apiRoot,
697
+ vncUrl: inst.vncUrl || inst.url || null,
698
+
699
+ // Dates
700
+ date: new Date().toISOString(),
701
+
702
+ // Team / session
703
+ teamId: sbx._teamId || null,
704
+ sessionId: currentInstance.getSessionId?.() || null,
705
+
706
+ // Test info
707
+ testFile: context.task.meta.testFile || null,
708
+ testName: context.task.name || null,
709
+ suiteName: context.task.suite?.name || null,
710
+
711
+ // Test result
712
+ testPassed: context.task.result?.state === "pass",
713
+ error: context.task.result?.errors?.[0]?.message || null,
714
+ errorStack: context.task.result?.errors?.[0]?.stack || null,
715
+
716
+ // Infrastructure
717
+ sandboxId: inst.sandboxId || inst.instanceId || null,
718
+ instanceId: inst.instanceId || null,
719
+ os: currentInstance.os || inst.os || null,
720
+ amiId: inst.amiId || null,
721
+ e2bTemplateId: inst.e2bTemplateId || null,
722
+ imageVersion: inst.imageVersion || null,
723
+
724
+ // Realtime
725
+ realtimeChannel: inst.channelName || sbx._channelName || null,
726
+ realtimeMessageCount: typeof sbx.getPublishCount === "function" ? sbx.getPublishCount() : 0,
727
+
728
+ // Interactions
729
+ interactions: currentInstance._interactionStats
730
+ ? { ...currentInstance._interactionStats, byType: { ...currentInstance._interactionStats.byType } }
731
+ : { total: 0, cached: 0, byType: {} },
732
+ };
733
+ }
734
+
647
735
  // Wait for connection to finish if it was initiated
648
736
  if (currentInstance.__connectionPromise) {
649
- await currentInstance.__connectionPromise.catch(() => {}); // Ignore connection errors during cleanup
737
+ await currentInstance.__connectionPromise.catch(() => { }); // Ignore connection errors during cleanup
650
738
  }
651
739
 
652
- // Disconnect with timeout
740
+ // Disconnect track the promise at module level so the *next* test
741
+ // can await it before connecting, even if the timeout fires first.
742
+ const disconnectPromise = currentInstance.disconnect().catch((err) => {
743
+ console.error("Error during disconnect:", err);
744
+ });
745
+ _pendingDisconnect = disconnectPromise;
746
+
747
+ // Allow up to 30 s for Ably presence leave / channel detach.
748
+ // If it takes longer, cleanup resolves but _pendingDisconnect
749
+ // keeps the reference so the next test still waits.
653
750
  await Promise.race([
654
- currentInstance.disconnect(),
655
- new Promise((resolve) => setTimeout(resolve, 5000)), // 5s timeout for disconnect
751
+ disconnectPromise,
752
+ new Promise((resolve) => setTimeout(resolve, 30000)),
656
753
  ]);
657
754
  } catch (error) {
658
755
  console.error("Error disconnecting client:", error);
@@ -0,0 +1,25 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { TestDriver } from "../lib/vitest/hooks.mjs";
3
+ import { getDefaults } from "../examples/config.mjs";
4
+
5
+ describe("Exec Log Streaming", () => {
6
+ it("should stream exec logs every second for 20 seconds", async (context) => {
7
+ const testdriver = TestDriver(context, { ...getDefaults(context), headless: true });
8
+ await testdriver.provision.chrome({ url: "about:blank" });
9
+
10
+ const code = `for i in $(seq 1 20); do echo "log line $i at $(date +%T)"; sleep 1; done`;
11
+
12
+ const result = await testdriver.exec({
13
+ language: "sh",
14
+ code,
15
+ timeout: 30000,
16
+ });
17
+
18
+ console.log("exec result:", result);
19
+
20
+ // Verify we got all 20 log lines
21
+ for (let i = 1; i <= 20; i++) {
22
+ expect(result).toContain(`log line ${i}`);
23
+ }
24
+ });
25
+ });
@@ -26,18 +26,33 @@ import { sessionManager } from "./session.js";
26
26
  const sdkRoot = path.join(path.dirname(fileURLToPath(import.meta.url)), "..", "..");
27
27
  const packageJson = JSON.parse(fs.readFileSync(path.join(sdkRoot, "package.json"), "utf-8"));
28
28
  const version = packageJson.version || "1.0.0";
29
- // Derive release channel from package version prerelease tag (e.g. "7.6.0-test.5" → "test")
29
+ // Derive release channel and infrastructure environment from package version
30
30
  import semver from "semver";
31
- const KNOWN_CHANNELS = new Set(["dev", "test", "canary", "latest"]);
32
- function resolveReleaseChannel(ver) {
33
- if (process.env.TD_CHANNEL && KNOWN_CHANNELS.has(process.env.TD_CHANNEL))
31
+ const CHANNEL_TO_ENV = {
32
+ dev: "dev",
33
+ test: "staging",
34
+ canary: "production",
35
+ stable: "production",
36
+ };
37
+ const VALID_CHANNELS = new Set(Object.keys(CHANNEL_TO_ENV));
38
+ const VALID_ENVS = new Set(["dev", "staging", "production"]);
39
+ function resolveChannel(ver) {
40
+ if (process.env.TD_CHANNEL && VALID_CHANNELS.has(process.env.TD_CHANNEL))
34
41
  return process.env.TD_CHANNEL;
42
+ if (process.env.TD_ENV && VALID_CHANNELS.has(process.env.TD_ENV))
43
+ return process.env.TD_ENV;
35
44
  const pre = semver.prerelease(ver);
36
- if (pre && pre.length > 0 && KNOWN_CHANNELS.has(String(pre[0])))
45
+ if (pre && pre.length > 0 && VALID_CHANNELS.has(String(pre[0])))
37
46
  return String(pre[0]);
38
- return "latest";
47
+ return "stable";
48
+ }
49
+ function resolveSentryEnvironment(ver) {
50
+ if (process.env.TD_ENV && VALID_ENVS.has(process.env.TD_ENV))
51
+ return process.env.TD_ENV;
52
+ return CHANNEL_TO_ENV[resolveChannel(ver)] || "production";
39
53
  }
40
- const releaseChannel = resolveReleaseChannel(version);
54
+ const activeChannel = resolveChannel(version);
55
+ const sentryEnvironment = resolveSentryEnvironment(version);
41
56
  const isSentryEnabled = () => {
42
57
  if (process.env.TD_TELEMETRY === "false") {
43
58
  return false;
@@ -49,7 +64,7 @@ if (isSentryEnabled()) {
49
64
  Sentry.init({
50
65
  dsn: process.env.SENTRY_DSN ||
51
66
  "https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
52
- environment: releaseChannel,
67
+ environment: sentryEnvironment,
53
68
  release: version,
54
69
  sampleRate: 1.0,
55
70
  tracesSampleRate: 1.0,
@@ -57,6 +72,7 @@ if (isSentryEnabled()) {
57
72
  integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
58
73
  initialScope: {
59
74
  tags: {
75
+ channel: activeChannel,
60
76
  platform: os.platform(),
61
77
  arch: os.arch(),
62
78
  nodeVersion: process.version,
@@ -84,6 +100,10 @@ if (isSentryEnabled()) {
84
100
  if (error && typeof error === "object" && "name" in error && error.name === "TestFailure") {
85
101
  return null;
86
102
  }
103
+ // Filter out ElementNotFoundError - expected test outcome, not a crash
104
+ if (error && typeof error === "object" && "name" in error && error.name === "ElementNotFoundError") {
105
+ return null;
106
+ }
87
107
  return event;
88
108
  },
89
109
  });
@@ -34,16 +34,33 @@ const sdkRoot = path.join(path.dirname(fileURLToPath(import.meta.url)), "..", ".
34
34
  const packageJson = JSON.parse(fs.readFileSync(path.join(sdkRoot, "package.json"), "utf-8"));
35
35
  const version = packageJson.version || "1.0.0";
36
36
 
37
- // Derive release channel from package version prerelease tag (e.g. "7.6.0-test.5" → "test")
37
+ // Derive release channel and infrastructure environment from package version
38
38
  import semver from "semver";
39
- const KNOWN_CHANNELS = new Set(["dev", "test", "canary", "latest"]);
40
- function resolveReleaseChannel(ver: string): string {
41
- if (process.env.TD_CHANNEL && KNOWN_CHANNELS.has(process.env.TD_CHANNEL)) return process.env.TD_CHANNEL;
39
+
40
+ const CHANNEL_TO_ENV: Record<string, string> = {
41
+ dev: "dev",
42
+ test: "staging",
43
+ canary: "production",
44
+ stable: "production",
45
+ };
46
+ const VALID_CHANNELS = new Set(Object.keys(CHANNEL_TO_ENV));
47
+ const VALID_ENVS = new Set(["dev", "staging", "production"]);
48
+
49
+ function resolveChannel(ver: string): string {
50
+ if (process.env.TD_CHANNEL && VALID_CHANNELS.has(process.env.TD_CHANNEL)) return process.env.TD_CHANNEL;
51
+ if (process.env.TD_ENV && VALID_CHANNELS.has(process.env.TD_ENV)) return process.env.TD_ENV;
42
52
  const pre = semver.prerelease(ver);
43
- if (pre && pre.length > 0 && KNOWN_CHANNELS.has(String(pre[0]))) return String(pre[0]);
44
- return "latest";
53
+ if (pre && pre.length > 0 && VALID_CHANNELS.has(String(pre[0]))) return String(pre[0]);
54
+ return "stable";
55
+ }
56
+
57
+ function resolveSentryEnvironment(ver: string): string {
58
+ if (process.env.TD_ENV && VALID_ENVS.has(process.env.TD_ENV)) return process.env.TD_ENV;
59
+ return CHANNEL_TO_ENV[resolveChannel(ver)] || "production";
45
60
  }
46
- const releaseChannel = resolveReleaseChannel(version);
61
+
62
+ const activeChannel = resolveChannel(version);
63
+ const sentryEnvironment = resolveSentryEnvironment(version);
47
64
 
48
65
  const isSentryEnabled = () => {
49
66
  if (process.env.TD_TELEMETRY === "false") {
@@ -58,7 +75,7 @@ if (isSentryEnabled()) {
58
75
  dsn:
59
76
  process.env.SENTRY_DSN ||
60
77
  "https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
61
- environment: releaseChannel,
78
+ environment: sentryEnvironment,
62
79
  release: version,
63
80
  sampleRate: 1.0,
64
81
  tracesSampleRate: 1.0,
@@ -66,6 +83,7 @@ if (isSentryEnabled()) {
66
83
  integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
67
84
  initialScope: {
68
85
  tags: {
86
+ channel: activeChannel,
69
87
  platform: os.platform(),
70
88
  arch: os.arch(),
71
89
  nodeVersion: process.version,
@@ -99,6 +117,11 @@ if (isSentryEnabled()) {
99
117
  if (error && typeof error === "object" && "name" in error && (error as { name: string }).name === "TestFailure") {
100
118
  return null;
101
119
  }
120
+
121
+ // Filter out ElementNotFoundError - expected test outcome, not a crash
122
+ if (error && typeof error === "object" && "name" in error && (error as { name: string }).name === "ElementNotFoundError") {
123
+ return null;
124
+ }
102
125
 
103
126
  return event;
104
127
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "testdriverai",
3
- "version": "7.8.0",
3
+ "version": "7.9.0-test.1",
4
4
  "description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
5
5
  "main": "sdk.js",
6
6
  "types": "sdk.d.ts",
@@ -116,6 +116,7 @@
116
116
  },
117
117
  "overrides": {
118
118
  "glob": "^11.0.1",
119
+ "obug": "2.1.1",
119
120
  "rimraf": "^5.0.10"
120
121
  },
121
122
  "peerDependencies": {
package/sdk.d.ts CHANGED
@@ -273,6 +273,8 @@ export interface TestDriverOptions {
273
273
  sandboxAmi?: string;
274
274
  /** EC2 instance type for sandbox (e.g., 'i3.metal') */
275
275
  sandboxInstance?: string;
276
+ /** E2B template ID to use when creating the sandbox (e.g., 'my-template-id') */
277
+ e2bTemplateId?: string;
276
278
  /** Cache key for element finding operations. If provided, enables caching tied to this key */
277
279
  cacheKey?: string;
278
280
  /** Reconnect to the last used sandbox instead of creating a new one. When true, provision methods (chrome, vscode, installer, etc.) will be skipped since the application is already running. Throws error if no previous sandbox exists. */
@@ -327,6 +329,8 @@ export interface ConnectOptions {
327
329
  sandboxAmi?: string;
328
330
  /** EC2 instance type for sandbox (e.g., 'i3.metal') */
329
331
  sandboxInstance?: string;
332
+ /** E2B template ID to use when creating the sandbox (e.g., 'my-template-id') */
333
+ e2bTemplateId?: string;
330
334
  /** Operating system for the sandbox (default: 'linux') */
331
335
  os?: "windows" | "linux";
332
336
  /**
package/sdk.js CHANGED
@@ -481,7 +481,7 @@ class Element {
481
481
  let cacheKey = null;
482
482
  let cacheThreshold = null;
483
483
  let perCommandThresholds = null; // Per-command { screen, element } override
484
- let zoom = false; // Default to disabled, enable with zoom: true
484
+ let zoom = true; // Default to enabled
485
485
  let perCommandAi = null; // Per-command AI config override
486
486
 
487
487
  let minConfidence = null; // Minimum confidence threshold
@@ -494,8 +494,8 @@ class Element {
494
494
  // New: options is an object with cacheKey and/or cacheThreshold
495
495
  cacheKey = options.cacheKey || null;
496
496
  cacheThreshold = options.cacheThreshold ?? null;
497
- // zoom defaults to false unless explicitly set to true
498
- zoom = options.zoom === true;
497
+ // zoom defaults to true unless explicitly set to false
498
+ zoom = options.zoom !== false;
499
499
  // Minimum confidence threshold: fail find if AI confidence is below this value
500
500
  minConfidence = options.confidence ?? null;
501
501
  // Element type hint for prompt wrapping
@@ -568,7 +568,7 @@ class Element {
568
568
  cacheKey: cacheKey,
569
569
  os: this.sdk.os,
570
570
  resolution: this.sdk.resolution,
571
- zoom: zoom,
571
+ zoom: zoom === true ? 1 : zoom === false ? 0 : zoom,
572
572
  confidence: minConfidence,
573
573
  type: elementType,
574
574
  ai: {
@@ -623,6 +623,11 @@ class Element {
623
623
 
624
624
  // Track find interaction once at the end (fire-and-forget, don't block)
625
625
  const sessionId = this.sdk.getSessionId();
626
+ const findCacheHit = response?.cacheHit || response?.cache_hit || response?.cached || false;
627
+ // Increment local interaction counters
628
+ this.sdk._interactionStats.total++;
629
+ this.sdk._interactionStats.byType.find = (this.sdk._interactionStats.byType.find || 0) + 1;
630
+ if (findCacheHit) this.sdk._interactionStats.cached++;
626
631
  if (sessionId && this.sdk.apiClient) {
627
632
  this.sdk.apiClient
628
633
  .req("interaction/track", {
@@ -632,11 +637,7 @@ class Element {
632
637
  timestamp: absoluteTimestamp, // Absolute epoch timestamp - frontend calculates relative using clientStartDate
633
638
  success: this._found,
634
639
  error: findError,
635
- cacheHit:
636
- response?.cacheHit ||
637
- response?.cache_hit ||
638
- response?.cached ||
639
- false,
640
+ cacheHit: findCacheHit,
640
641
  selector: response?.selector,
641
642
  selectorUsed: !!response?.selector,
642
643
  confidence: response?.confidence ?? null,
@@ -1498,6 +1499,7 @@ class TestDriverSDK {
1498
1499
  // Store sandbox configuration options
1499
1500
  this.sandboxAmi = options.sandboxAmi || null;
1500
1501
  this.sandboxInstance = options.sandboxInstance || null;
1502
+ this.e2bTemplateId = options.e2bTemplateId || null;
1501
1503
 
1502
1504
  // Store reconnect preference from options
1503
1505
  this.reconnect =
@@ -1616,6 +1618,12 @@ class TestDriverSDK {
1616
1618
  // Uploaded to S3 at cleanup so they can be displayed alongside dashcam replays.
1617
1619
  this._logBuffer = [];
1618
1620
 
1621
+ // API version discovered by _logEnvironmentInfo()
1622
+ this._apiVersion = null;
1623
+
1624
+ // Local interaction counters — incremented at each interaction/track call site
1625
+ this._interactionStats = { total: 0, cached: 0, byType: {} };
1626
+
1619
1627
  // Set up event listeners once (they live for the lifetime of the SDK instance)
1620
1628
  this._setupLogging();
1621
1629
 
@@ -2716,6 +2724,7 @@ CAPTCHA_SOLVER_EOF`,
2716
2724
  * @param {string} options.ip - Direct IP address to connect to
2717
2725
  * @param {string} options.sandboxAmi - AMI to use for the sandbox
2718
2726
  * @param {string} options.sandboxInstance - Instance type for the sandbox
2727
+ * @param {string} options.e2bTemplateId - E2B template ID to use when creating the sandbox
2719
2728
  * @param {string} options.os - Operating system for the sandbox (windows or linux)
2720
2729
  * @param {boolean} options.reuseConnection - Reuse recent connection if available (default: true)
2721
2730
  * @returns {Promise<Object>} Sandbox instance details
@@ -2804,6 +2813,12 @@ CAPTCHA_SOLVER_EOF`,
2804
2813
  } else if (this.sandboxInstance) {
2805
2814
  this.agent.sandboxInstance = this.sandboxInstance;
2806
2815
  }
2816
+ // Use e2bTemplateId from connectOptions if provided, otherwise fall back to constructor value
2817
+ if (connectOptions.e2bTemplateId !== undefined) {
2818
+ this.agent.e2bTemplateId = connectOptions.e2bTemplateId;
2819
+ } else if (this.e2bTemplateId) {
2820
+ this.agent.e2bTemplateId = this.e2bTemplateId;
2821
+ }
2807
2822
  // Use os from connectOptions if provided, otherwise fall back to this.os
2808
2823
  if (connectOptions.os !== undefined) {
2809
2824
  this.agent.sandboxOs = connectOptions.os;
@@ -3193,6 +3208,11 @@ CAPTCHA_SOLVER_EOF`,
3193
3208
 
3194
3209
  // Track successful findAll interaction (fire-and-forget, don't block)
3195
3210
  const sessionId = this.getSessionId();
3211
+ const findAllCacheHit = response.cached || false;
3212
+ // Increment local interaction counters
3213
+ this._interactionStats.total++;
3214
+ this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
3215
+ if (findAllCacheHit) this._interactionStats.cached++;
3196
3216
  if (sessionId && this.apiClient) {
3197
3217
  this.apiClient
3198
3218
  .req("interaction/track", {
@@ -3202,7 +3222,7 @@ CAPTCHA_SOLVER_EOF`,
3202
3222
  timestamp: absoluteTimestamp, // Absolute epoch timestamp - frontend calculates relative using clientStartDate
3203
3223
  success: true,
3204
3224
  input: { count: elements.length },
3205
- cacheHit: response.cached || false,
3225
+ cacheHit: findAllCacheHit,
3206
3226
  selector: response.selector,
3207
3227
  selectorUsed: !!response.selector,
3208
3228
  screenshotUrl: response.screenshotKey ?? null,
@@ -3248,6 +3268,11 @@ CAPTCHA_SOLVER_EOF`,
3248
3268
 
3249
3269
  // No elements found - track interaction (fire-and-forget, don't block)
3250
3270
  const sessionId = this.getSessionId();
3271
+ const noResultCacheHit = response?.cached || false;
3272
+ // Increment local interaction counters
3273
+ this._interactionStats.total++;
3274
+ this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
3275
+ if (noResultCacheHit) this._interactionStats.cached++;
3251
3276
  if (sessionId && this.apiClient) {
3252
3277
  this.apiClient
3253
3278
  .req("interaction/track", {
@@ -3258,7 +3283,7 @@ CAPTCHA_SOLVER_EOF`,
3258
3283
  success: false,
3259
3284
  error: "No elements found",
3260
3285
  input: { count: 0 },
3261
- cacheHit: response?.cached || false,
3286
+ cacheHit: noResultCacheHit,
3262
3287
  selector: response?.selector,
3263
3288
  selectorUsed: !!response?.selector,
3264
3289
  screenshotUrl: response?.screenshotKey ?? null,
@@ -3292,6 +3317,9 @@ CAPTCHA_SOLVER_EOF`,
3292
3317
 
3293
3318
  // Track findAll error interaction (fire-and-forget, don't block)
3294
3319
  const sessionId = this.getSessionId();
3320
+ // Increment local interaction counters
3321
+ this._interactionStats.total++;
3322
+ this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
3295
3323
  if (sessionId && this.apiClient) {
3296
3324
  this.apiClient
3297
3325
  .req("interaction/track", {
@@ -3817,7 +3845,7 @@ CAPTCHA_SOLVER_EOF`,
3817
3845
  const apiRoot = this.config?.TD_API_ROOT || 'unknown';
3818
3846
  const apiKey = this.config?.TD_API_KEY || '';
3819
3847
  const maskedKey = apiKey.length > 4 ? '***' + apiKey.slice(-4) : '(not set)';
3820
- const env = process.env.TD_ENV || 'unknown';
3848
+ const env = process.env.TD_CHANNEL || process.env.TD_ENV || 'unknown';
3821
3849
  const os = this.agent?.options?.os || process.env.TD_OS || 'linux';
3822
3850
  const sdkVersion = require('./package.json').version;
3823
3851
 
@@ -3843,6 +3871,8 @@ CAPTCHA_SOLVER_EOF`,
3843
3871
  res.on('end', () => {
3844
3872
  try {
3845
3873
  const info = JSON.parse(data);
3874
+ // Persist API version for test result metadata
3875
+ this._apiVersion = info.version || null;
3846
3876
  const commit = info.commit || 'unknown';
3847
3877
  const shortCommit = commit.substring(0, 7);
3848
3878
  const commitUrl = commit !== 'unknown'
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Usage: ./install-dev-runner.sh <instance-id>
5
+ INSTANCE_ID="${1:?Usage: $0 <instance-id>}"
6
+ AWS_REGION="${AWS_REGION:-us-east-2}"
7
+
8
+ RUNNER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../runner" && pwd)"
9
+
10
+ echo "Packing local runner..."
11
+ TMPDIR=$(mktemp -d)
12
+ pushd "$RUNNER_DIR" > /dev/null
13
+ npm pack --pack-destination "$TMPDIR" > /dev/null 2>&1
14
+ TARBALL=$(ls "$TMPDIR"/*.tgz)
15
+ popd > /dev/null
16
+ echo "Tarball: $TARBALL"
17
+
18
+ echo "Uploading to S3..."
19
+ S3_KEY="runner-dev/$(date +%s)-$(openssl rand -hex 4)/runner.tgz"
20
+ aws s3 cp "$TARBALL" "s3://v7-transfer/${S3_KEY}" --region "$AWS_REGION" > /dev/null
21
+ DOWNLOAD_URL=$(aws s3 presign "s3://v7-transfer/${S3_KEY}" --expires-in 900 --region "$AWS_REGION")
22
+ rm -rf "$TMPDIR"
23
+
24
+ echo "Creating SSM params file..."
25
+
26
+ # Write Python script to temp file to generate valid JSON
27
+ PYTHON_SCRIPT=$(mktemp --suffix=.py)
28
+ cat > "$PYTHON_SCRIPT" << 'PYEOF'
29
+ import json
30
+ import sys
31
+
32
+ url = sys.argv[1]
33
+
34
+ commands = [
35
+ "Write-Host '=== Stopping runner ==='",
36
+ "Stop-ScheduledTask -TaskName RunTestDriverAgent -ErrorAction SilentlyContinue",
37
+ "Stop-Process -Name node -Force -ErrorAction SilentlyContinue",
38
+ "Start-Sleep -Seconds 2",
39
+ "Set-Location 'C:\\testdriver\\sandbox-agent'",
40
+ "$tarball = 'C:\\Windows\\Temp\\runner-dev.tgz'",
41
+ f"Invoke-WebRequest -Uri '{url}' -OutFile $tarball",
42
+ "Write-Host 'Tarball size:'; (Get-Item $tarball).Length",
43
+ "Remove-Item -Path lib -Recurse -Force -ErrorAction SilentlyContinue",
44
+ "tar -xzf $tarball --strip-components=1 -C .",
45
+ "Get-Content 'package.json' | ConvertFrom-Json | Select-Object -ExpandProperty version",
46
+ "Write-Host '=== Starting runner ==='",
47
+ "Start-ScheduledTask -TaskName RunTestDriverAgent",
48
+ "Start-Sleep -Seconds 3",
49
+ "Get-Content 'C:\\testdriver\\log.txt' -Tail 20"
50
+ ]
51
+
52
+ params = {"commands": commands}
53
+ print(json.dumps(params))
54
+ PYEOF
55
+
56
+ python3 "$PYTHON_SCRIPT" "$DOWNLOAD_URL" > /tmp/ssm-install-params.json
57
+ rm "$PYTHON_SCRIPT"
58
+
59
+ echo "Sending SSM command..."
60
+ CMD_JSON=$(aws ssm send-command \
61
+ --region "$AWS_REGION" \
62
+ --instance-ids "$INSTANCE_ID" \
63
+ --document-name "AWS-RunPowerShellScript" \
64
+ --parameters "file:///tmp/ssm-install-params.json" \
65
+ --output json)
66
+
67
+ COMMAND_ID=$(echo "$CMD_JSON" | jq -r '.Command.CommandId')
68
+ echo "Command ID: $COMMAND_ID"
69
+
70
+ echo "Waiting for completion..."
71
+ aws ssm wait command-executed --region "$AWS_REGION" --command-id "$COMMAND_ID" --instance-id "$INSTANCE_ID" || true
72
+
73
+ echo "Getting output..."
74
+ aws ssm get-command-invocation \
75
+ --region "$AWS_REGION" \
76
+ --command-id "$COMMAND_ID" \
77
+ --instance-id "$INSTANCE_ID" \
78
+ --query 'StandardOutputContent' \
79
+ --output text