testdriverai 7.8.0 → 7.9.0-test.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent/index.js +12 -0
- package/agent/lib/http.js +21 -3
- package/agent/lib/logger.js +15 -0
- package/agent/lib/provision-commands.js +176 -0
- package/agent/lib/sandbox.js +667 -118
- package/agent/lib/sdk.js +1 -20
- package/ai/skills/testdriver-find/SKILL.md +14 -20
- package/docs/_data/examples-manifest.json +46 -46
- package/docs/_scripts/extract-example-urls.js +67 -72
- package/docs/docs.json +2 -1
- package/docs/v7/examples/ai.mdx +1 -1
- package/docs/v7/examples/assert.mdx +1 -1
- package/docs/v7/examples/captcha-api.mdx +1 -1
- package/docs/v7/examples/chrome-extension.mdx +1 -1
- package/docs/v7/examples/drag-and-drop.mdx +1 -1
- package/docs/v7/examples/element-not-found.mdx +1 -1
- package/docs/v7/examples/exec-output.mdx +1 -1
- package/docs/v7/examples/exec-pwsh.mdx +1 -1
- package/docs/v7/examples/focus-window.mdx +1 -1
- package/docs/v7/examples/hover-image.mdx +1 -1
- package/docs/v7/examples/hover-text.mdx +1 -1
- package/docs/v7/examples/installer.mdx +1 -1
- package/docs/v7/examples/launch-vscode-linux.mdx +1 -1
- package/docs/v7/examples/match-image.mdx +1 -1
- package/docs/v7/examples/press-keys.mdx +1 -1
- package/docs/v7/examples/scroll-keyboard.mdx +1 -1
- package/docs/v7/examples/scroll-until-image.mdx +1 -1
- package/docs/v7/examples/scroll-until-text.mdx +1 -1
- package/docs/v7/examples/scroll.mdx +1 -1
- package/docs/v7/examples/type.mdx +1 -1
- package/docs/v7/examples/windows-installer.mdx +1 -1
- package/docs/v7/find.mdx +14 -20
- package/docs/v7/test-results-json.mdx +258 -0
- package/examples/scroll-keyboard.test.mjs +1 -1
- package/examples/scroll.test.mjs +1 -12
- package/interfaces/vitest-plugin.mjs +167 -51
- package/lib/core/Dashcam.js +16 -22
- package/lib/environments.json +8 -4
- package/lib/github-comment.mjs +58 -40
- package/lib/init-project.js +5 -67
- package/lib/resolve-channel.js +39 -10
- package/lib/sentry.js +47 -23
- package/lib/vitest/hooks.mjs +117 -20
- package/manual/exec-stream-logs.test.mjs +25 -0
- package/mcp-server/dist/server.mjs +28 -8
- package/mcp-server/src/server.ts +31 -8
- package/package.json +2 -1
- package/sdk.d.ts +4 -0
- package/sdk.js +42 -12
- package/setup/aws/install-dev-runner.sh +79 -0
- package/setup/aws/spawn-runner.sh +165 -0
- package/test-sentry-span.js +35 -0
- package/vitest.config.mjs +7 -3
- package/vitest.runner.config.mjs +33 -0
- package/docs/v7/_drafts/core.mdx +0 -458
package/lib/vitest/hooks.mjs
CHANGED
|
@@ -42,14 +42,14 @@ function checkVitestVersion() {
|
|
|
42
42
|
if (major < MINIMUM_VITEST_VERSION) {
|
|
43
43
|
throw new Error(
|
|
44
44
|
`TestDriver requires Vitest >= ${MINIMUM_VITEST_VERSION}.0.0, but found ${version}. ` +
|
|
45
|
-
|
|
45
|
+
`Please upgrade Vitest: npm install vitest@latest`,
|
|
46
46
|
);
|
|
47
47
|
}
|
|
48
48
|
} catch (err) {
|
|
49
49
|
if (err.code === "MODULE_NOT_FOUND") {
|
|
50
50
|
throw new Error(
|
|
51
51
|
"TestDriver requires Vitest to be installed. " +
|
|
52
|
-
|
|
52
|
+
"Please install it: npm install vitest@latest",
|
|
53
53
|
);
|
|
54
54
|
}
|
|
55
55
|
throw err;
|
|
@@ -181,18 +181,33 @@ function setupConsoleSpy(client, taskId) {
|
|
|
181
181
|
|
|
182
182
|
/**
|
|
183
183
|
* Unregister a client so its sandbox no longer receives forwarded logs.
|
|
184
|
-
*
|
|
185
|
-
*
|
|
186
|
-
*
|
|
187
|
-
*
|
|
188
|
-
* re-install
|
|
184
|
+
*
|
|
185
|
+
* Between sequential `it()` blocks we intentionally keep the spies installed.
|
|
186
|
+
* The `bufferConsoleToClients` function is a no-op when `activeClients` is
|
|
187
|
+
* empty, so leaving the spy in place is harmless and avoids a non-atomic
|
|
188
|
+
* restore/re-install race that can corrupt console method references.
|
|
189
|
+
*
|
|
190
|
+
* Spies are torn down once at process exit so the Vitest worker fork can
|
|
191
|
+
* shut down cleanly (unreleased vi.spyOn mocks prevent exit).
|
|
192
|
+
*
|
|
189
193
|
* @param {import('../../sdk.js').default} client - TestDriver client instance
|
|
190
194
|
*/
|
|
191
195
|
function cleanupConsoleSpy(client) {
|
|
192
196
|
_consoleSpy.activeClients.delete(client);
|
|
193
197
|
|
|
194
|
-
|
|
195
|
-
|
|
198
|
+
if (debugConsoleSpy) {
|
|
199
|
+
process.stdout.write(
|
|
200
|
+
`[DEBUG cleanupConsoleSpy] clients remaining: ${_consoleSpy.activeClients.size}\n`,
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Tear down the singleton console spy completely.
|
|
207
|
+
* Called once on process exit so the Vitest worker can shut down cleanly.
|
|
208
|
+
*/
|
|
209
|
+
function teardownConsoleSpy() {
|
|
210
|
+
if (_consoleSpy.spies) {
|
|
196
211
|
_consoleSpy.spies.log.mockRestore();
|
|
197
212
|
_consoleSpy.spies.error.mockRestore();
|
|
198
213
|
_consoleSpy.spies.warn.mockRestore();
|
|
@@ -202,21 +217,26 @@ function cleanupConsoleSpy(client) {
|
|
|
202
217
|
_consoleSpy.installed = false;
|
|
203
218
|
|
|
204
219
|
if (debugConsoleSpy) {
|
|
205
|
-
process.stdout.write("[DEBUG
|
|
220
|
+
process.stdout.write("[DEBUG teardownConsoleSpy] All spies restored\n");
|
|
206
221
|
}
|
|
207
222
|
}
|
|
208
|
-
|
|
209
|
-
if (debugConsoleSpy) {
|
|
210
|
-
process.stdout.write(
|
|
211
|
-
`[DEBUG cleanupConsoleSpy] clients remaining: ${_consoleSpy.activeClients.size}\n`,
|
|
212
|
-
);
|
|
213
|
-
}
|
|
214
223
|
}
|
|
215
224
|
|
|
225
|
+
// Restore console spies on process exit so the Vitest worker can exit cleanly
|
|
226
|
+
process.on("exit", teardownConsoleSpy);
|
|
227
|
+
|
|
216
228
|
// Weak maps to store instances per test context
|
|
217
229
|
const testDriverInstances = new WeakMap();
|
|
218
230
|
const lifecycleHandlers = new WeakMap();
|
|
219
231
|
|
|
232
|
+
/**
|
|
233
|
+
* Module-level promise tracking the most recent test's disconnect.
|
|
234
|
+
* When sequential `it()` blocks run, the next test awaits this promise
|
|
235
|
+
* before connecting — ensuring the previous sandbox is fully torn down
|
|
236
|
+
* even if the cleanup's disconnect timeout fired early.
|
|
237
|
+
*/
|
|
238
|
+
let _pendingDisconnect = null;
|
|
239
|
+
|
|
220
240
|
/**
|
|
221
241
|
* Upload buffered SDK + console logs directly to S3 via the existing Log system.
|
|
222
242
|
* Extracts the replayId from the dashcam URL, calls POST /api/v1/logs to create
|
|
@@ -403,6 +423,11 @@ export function TestDriver(context, options = {}) {
|
|
|
403
423
|
config.apiRoot = process.env.TD_API_ROOT;
|
|
404
424
|
}
|
|
405
425
|
|
|
426
|
+
// Use TD_E2B_TEMPLATE_ID from environment if not provided in config
|
|
427
|
+
if (!config.e2bTemplateId && process.env.TD_E2B_TEMPLATE_ID) {
|
|
428
|
+
config.e2bTemplateId = process.env.TD_E2B_TEMPLATE_ID;
|
|
429
|
+
}
|
|
430
|
+
|
|
406
431
|
const testdriver = new TestDriverSDK(apiKey, config);
|
|
407
432
|
testdriver.__vitestContext = context.task;
|
|
408
433
|
testdriver._debugOnFailure = mergedOptions.debugOnFailure || false;
|
|
@@ -431,6 +456,14 @@ export function TestDriver(context, options = {}) {
|
|
|
431
456
|
const debugConsoleSpy = process.env.TD_DEBUG_CONSOLE_SPY === "true";
|
|
432
457
|
|
|
433
458
|
testdriver.__connectionPromise = (async () => {
|
|
459
|
+
// Wait for any previous test's disconnect to fully complete.
|
|
460
|
+
// This prevents the new sandbox connection from racing with a
|
|
461
|
+
// lingering disconnect when sequential `it()` blocks run.
|
|
462
|
+
if (_pendingDisconnect) {
|
|
463
|
+
await _pendingDisconnect.catch(() => {});
|
|
464
|
+
_pendingDisconnect = null;
|
|
465
|
+
}
|
|
466
|
+
|
|
434
467
|
if (debugConsoleSpy) {
|
|
435
468
|
console.log(
|
|
436
469
|
"[DEBUG] Before auth - sandbox.instanceSocketConnected:",
|
|
@@ -644,15 +677,79 @@ export function TestDriver(context, options = {}) {
|
|
|
644
677
|
// Clean up console spies
|
|
645
678
|
cleanupConsoleSpy(currentInstance);
|
|
646
679
|
|
|
680
|
+
// Build test result metadata for JSON report output
|
|
681
|
+
{
|
|
682
|
+
const sdkPkg = require("../../package.json");
|
|
683
|
+
const inst = currentInstance.getInstance?.() || {};
|
|
684
|
+
const sbx = currentInstance.sandbox || {};
|
|
685
|
+
const apiRoot = currentInstance.config?.TD_API_ROOT || null;
|
|
686
|
+
|
|
687
|
+
context.task.meta.testResult = {
|
|
688
|
+
// Versions
|
|
689
|
+
sdkVersion: sdkPkg.version || null,
|
|
690
|
+
apiVersion: currentInstance._apiVersion || null,
|
|
691
|
+
runnerVersionBefore: inst.runnerVersionBefore || null,
|
|
692
|
+
runnerVersionAfter: inst.runnerVersionAfter || null,
|
|
693
|
+
wasUpdated: inst.wasUpdated || false,
|
|
694
|
+
|
|
695
|
+
// URLs
|
|
696
|
+
apiUrl: apiRoot,
|
|
697
|
+
vncUrl: inst.vncUrl || inst.url || null,
|
|
698
|
+
|
|
699
|
+
// Dates
|
|
700
|
+
date: new Date().toISOString(),
|
|
701
|
+
|
|
702
|
+
// Team / session
|
|
703
|
+
teamId: sbx._teamId || null,
|
|
704
|
+
sessionId: currentInstance.getSessionId?.() || null,
|
|
705
|
+
|
|
706
|
+
// Test info
|
|
707
|
+
testFile: context.task.meta.testFile || null,
|
|
708
|
+
testName: context.task.name || null,
|
|
709
|
+
suiteName: context.task.suite?.name || null,
|
|
710
|
+
|
|
711
|
+
// Test result
|
|
712
|
+
testPassed: context.task.result?.state === "pass",
|
|
713
|
+
error: context.task.result?.errors?.[0]?.message || null,
|
|
714
|
+
errorStack: context.task.result?.errors?.[0]?.stack || null,
|
|
715
|
+
|
|
716
|
+
// Infrastructure
|
|
717
|
+
sandboxId: inst.sandboxId || inst.instanceId || null,
|
|
718
|
+
instanceId: inst.instanceId || null,
|
|
719
|
+
os: currentInstance.os || inst.os || null,
|
|
720
|
+
amiId: inst.amiId || null,
|
|
721
|
+
e2bTemplateId: inst.e2bTemplateId || null,
|
|
722
|
+
imageVersion: inst.imageVersion || null,
|
|
723
|
+
|
|
724
|
+
// Realtime
|
|
725
|
+
realtimeChannel: inst.channelName || sbx._channelName || null,
|
|
726
|
+
realtimeMessageCount: typeof sbx.getPublishCount === "function" ? sbx.getPublishCount() : 0,
|
|
727
|
+
|
|
728
|
+
// Interactions
|
|
729
|
+
interactions: currentInstance._interactionStats
|
|
730
|
+
? { ...currentInstance._interactionStats, byType: { ...currentInstance._interactionStats.byType } }
|
|
731
|
+
: { total: 0, cached: 0, byType: {} },
|
|
732
|
+
};
|
|
733
|
+
}
|
|
734
|
+
|
|
647
735
|
// Wait for connection to finish if it was initiated
|
|
648
736
|
if (currentInstance.__connectionPromise) {
|
|
649
|
-
await currentInstance.__connectionPromise.catch(() => {}); // Ignore connection errors during cleanup
|
|
737
|
+
await currentInstance.__connectionPromise.catch(() => { }); // Ignore connection errors during cleanup
|
|
650
738
|
}
|
|
651
739
|
|
|
652
|
-
// Disconnect
|
|
740
|
+
// Disconnect — track the promise at module level so the *next* test
|
|
741
|
+
// can await it before connecting, even if the timeout fires first.
|
|
742
|
+
const disconnectPromise = currentInstance.disconnect().catch((err) => {
|
|
743
|
+
console.error("Error during disconnect:", err);
|
|
744
|
+
});
|
|
745
|
+
_pendingDisconnect = disconnectPromise;
|
|
746
|
+
|
|
747
|
+
// Allow up to 30 s for Ably presence leave / channel detach.
|
|
748
|
+
// If it takes longer, cleanup resolves but _pendingDisconnect
|
|
749
|
+
// keeps the reference so the next test still waits.
|
|
653
750
|
await Promise.race([
|
|
654
|
-
|
|
655
|
-
new Promise((resolve) => setTimeout(resolve,
|
|
751
|
+
disconnectPromise,
|
|
752
|
+
new Promise((resolve) => setTimeout(resolve, 30000)),
|
|
656
753
|
]);
|
|
657
754
|
} catch (error) {
|
|
658
755
|
console.error("Error disconnecting client:", error);
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { TestDriver } from "../lib/vitest/hooks.mjs";
|
|
3
|
+
import { getDefaults } from "../examples/config.mjs";
|
|
4
|
+
|
|
5
|
+
describe("Exec Log Streaming", () => {
|
|
6
|
+
it("should stream exec logs every second for 20 seconds", async (context) => {
|
|
7
|
+
const testdriver = TestDriver(context, { ...getDefaults(context), headless: true });
|
|
8
|
+
await testdriver.provision.chrome({ url: "about:blank" });
|
|
9
|
+
|
|
10
|
+
const code = `for i in $(seq 1 20); do echo "log line $i at $(date +%T)"; sleep 1; done`;
|
|
11
|
+
|
|
12
|
+
const result = await testdriver.exec({
|
|
13
|
+
language: "sh",
|
|
14
|
+
code,
|
|
15
|
+
timeout: 30000,
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
console.log("exec result:", result);
|
|
19
|
+
|
|
20
|
+
// Verify we got all 20 log lines
|
|
21
|
+
for (let i = 1; i <= 20; i++) {
|
|
22
|
+
expect(result).toContain(`log line ${i}`);
|
|
23
|
+
}
|
|
24
|
+
});
|
|
25
|
+
});
|
|
@@ -26,18 +26,33 @@ import { sessionManager } from "./session.js";
|
|
|
26
26
|
const sdkRoot = path.join(path.dirname(fileURLToPath(import.meta.url)), "..", "..");
|
|
27
27
|
const packageJson = JSON.parse(fs.readFileSync(path.join(sdkRoot, "package.json"), "utf-8"));
|
|
28
28
|
const version = packageJson.version || "1.0.0";
|
|
29
|
-
// Derive release channel from package version
|
|
29
|
+
// Derive release channel and infrastructure environment from package version
|
|
30
30
|
import semver from "semver";
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
const CHANNEL_TO_ENV = {
|
|
32
|
+
dev: "dev",
|
|
33
|
+
test: "staging",
|
|
34
|
+
canary: "production",
|
|
35
|
+
stable: "production",
|
|
36
|
+
};
|
|
37
|
+
const VALID_CHANNELS = new Set(Object.keys(CHANNEL_TO_ENV));
|
|
38
|
+
const VALID_ENVS = new Set(["dev", "staging", "production"]);
|
|
39
|
+
function resolveChannel(ver) {
|
|
40
|
+
if (process.env.TD_CHANNEL && VALID_CHANNELS.has(process.env.TD_CHANNEL))
|
|
34
41
|
return process.env.TD_CHANNEL;
|
|
42
|
+
if (process.env.TD_ENV && VALID_CHANNELS.has(process.env.TD_ENV))
|
|
43
|
+
return process.env.TD_ENV;
|
|
35
44
|
const pre = semver.prerelease(ver);
|
|
36
|
-
if (pre && pre.length > 0 &&
|
|
45
|
+
if (pre && pre.length > 0 && VALID_CHANNELS.has(String(pre[0])))
|
|
37
46
|
return String(pre[0]);
|
|
38
|
-
return "
|
|
47
|
+
return "stable";
|
|
48
|
+
}
|
|
49
|
+
function resolveSentryEnvironment(ver) {
|
|
50
|
+
if (process.env.TD_ENV && VALID_ENVS.has(process.env.TD_ENV))
|
|
51
|
+
return process.env.TD_ENV;
|
|
52
|
+
return CHANNEL_TO_ENV[resolveChannel(ver)] || "production";
|
|
39
53
|
}
|
|
40
|
-
const
|
|
54
|
+
const activeChannel = resolveChannel(version);
|
|
55
|
+
const sentryEnvironment = resolveSentryEnvironment(version);
|
|
41
56
|
const isSentryEnabled = () => {
|
|
42
57
|
if (process.env.TD_TELEMETRY === "false") {
|
|
43
58
|
return false;
|
|
@@ -49,7 +64,7 @@ if (isSentryEnabled()) {
|
|
|
49
64
|
Sentry.init({
|
|
50
65
|
dsn: process.env.SENTRY_DSN ||
|
|
51
66
|
"https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
|
|
52
|
-
environment:
|
|
67
|
+
environment: sentryEnvironment,
|
|
53
68
|
release: version,
|
|
54
69
|
sampleRate: 1.0,
|
|
55
70
|
tracesSampleRate: 1.0,
|
|
@@ -57,6 +72,7 @@ if (isSentryEnabled()) {
|
|
|
57
72
|
integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
|
|
58
73
|
initialScope: {
|
|
59
74
|
tags: {
|
|
75
|
+
channel: activeChannel,
|
|
60
76
|
platform: os.platform(),
|
|
61
77
|
arch: os.arch(),
|
|
62
78
|
nodeVersion: process.version,
|
|
@@ -84,6 +100,10 @@ if (isSentryEnabled()) {
|
|
|
84
100
|
if (error && typeof error === "object" && "name" in error && error.name === "TestFailure") {
|
|
85
101
|
return null;
|
|
86
102
|
}
|
|
103
|
+
// Filter out ElementNotFoundError - expected test outcome, not a crash
|
|
104
|
+
if (error && typeof error === "object" && "name" in error && error.name === "ElementNotFoundError") {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
87
107
|
return event;
|
|
88
108
|
},
|
|
89
109
|
});
|
package/mcp-server/src/server.ts
CHANGED
|
@@ -34,16 +34,33 @@ const sdkRoot = path.join(path.dirname(fileURLToPath(import.meta.url)), "..", ".
|
|
|
34
34
|
const packageJson = JSON.parse(fs.readFileSync(path.join(sdkRoot, "package.json"), "utf-8"));
|
|
35
35
|
const version = packageJson.version || "1.0.0";
|
|
36
36
|
|
|
37
|
-
// Derive release channel from package version
|
|
37
|
+
// Derive release channel and infrastructure environment from package version
|
|
38
38
|
import semver from "semver";
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
|
|
40
|
+
const CHANNEL_TO_ENV: Record<string, string> = {
|
|
41
|
+
dev: "dev",
|
|
42
|
+
test: "staging",
|
|
43
|
+
canary: "production",
|
|
44
|
+
stable: "production",
|
|
45
|
+
};
|
|
46
|
+
const VALID_CHANNELS = new Set(Object.keys(CHANNEL_TO_ENV));
|
|
47
|
+
const VALID_ENVS = new Set(["dev", "staging", "production"]);
|
|
48
|
+
|
|
49
|
+
function resolveChannel(ver: string): string {
|
|
50
|
+
if (process.env.TD_CHANNEL && VALID_CHANNELS.has(process.env.TD_CHANNEL)) return process.env.TD_CHANNEL;
|
|
51
|
+
if (process.env.TD_ENV && VALID_CHANNELS.has(process.env.TD_ENV)) return process.env.TD_ENV;
|
|
42
52
|
const pre = semver.prerelease(ver);
|
|
43
|
-
if (pre && pre.length > 0 &&
|
|
44
|
-
return "
|
|
53
|
+
if (pre && pre.length > 0 && VALID_CHANNELS.has(String(pre[0]))) return String(pre[0]);
|
|
54
|
+
return "stable";
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function resolveSentryEnvironment(ver: string): string {
|
|
58
|
+
if (process.env.TD_ENV && VALID_ENVS.has(process.env.TD_ENV)) return process.env.TD_ENV;
|
|
59
|
+
return CHANNEL_TO_ENV[resolveChannel(ver)] || "production";
|
|
45
60
|
}
|
|
46
|
-
|
|
61
|
+
|
|
62
|
+
const activeChannel = resolveChannel(version);
|
|
63
|
+
const sentryEnvironment = resolveSentryEnvironment(version);
|
|
47
64
|
|
|
48
65
|
const isSentryEnabled = () => {
|
|
49
66
|
if (process.env.TD_TELEMETRY === "false") {
|
|
@@ -58,7 +75,7 @@ if (isSentryEnabled()) {
|
|
|
58
75
|
dsn:
|
|
59
76
|
process.env.SENTRY_DSN ||
|
|
60
77
|
"https://452bd5a00dbd83a38ee8813e11c57694@o4510262629236736.ingest.us.sentry.io/4510480443637760",
|
|
61
|
-
environment:
|
|
78
|
+
environment: sentryEnvironment,
|
|
62
79
|
release: version,
|
|
63
80
|
sampleRate: 1.0,
|
|
64
81
|
tracesSampleRate: 1.0,
|
|
@@ -66,6 +83,7 @@ if (isSentryEnabled()) {
|
|
|
66
83
|
integrations: [Sentry.httpIntegration(), Sentry.nodeContextIntegration()],
|
|
67
84
|
initialScope: {
|
|
68
85
|
tags: {
|
|
86
|
+
channel: activeChannel,
|
|
69
87
|
platform: os.platform(),
|
|
70
88
|
arch: os.arch(),
|
|
71
89
|
nodeVersion: process.version,
|
|
@@ -99,6 +117,11 @@ if (isSentryEnabled()) {
|
|
|
99
117
|
if (error && typeof error === "object" && "name" in error && (error as { name: string }).name === "TestFailure") {
|
|
100
118
|
return null;
|
|
101
119
|
}
|
|
120
|
+
|
|
121
|
+
// Filter out ElementNotFoundError - expected test outcome, not a crash
|
|
122
|
+
if (error && typeof error === "object" && "name" in error && (error as { name: string }).name === "ElementNotFoundError") {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
102
125
|
|
|
103
126
|
return event;
|
|
104
127
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "testdriverai",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.9.0-test.1",
|
|
4
4
|
"description": "Next generation autonomous AI agent for end-to-end testing of web & desktop",
|
|
5
5
|
"main": "sdk.js",
|
|
6
6
|
"types": "sdk.d.ts",
|
|
@@ -116,6 +116,7 @@
|
|
|
116
116
|
},
|
|
117
117
|
"overrides": {
|
|
118
118
|
"glob": "^11.0.1",
|
|
119
|
+
"obug": "2.1.1",
|
|
119
120
|
"rimraf": "^5.0.10"
|
|
120
121
|
},
|
|
121
122
|
"peerDependencies": {
|
package/sdk.d.ts
CHANGED
|
@@ -273,6 +273,8 @@ export interface TestDriverOptions {
|
|
|
273
273
|
sandboxAmi?: string;
|
|
274
274
|
/** EC2 instance type for sandbox (e.g., 'i3.metal') */
|
|
275
275
|
sandboxInstance?: string;
|
|
276
|
+
/** E2B template ID to use when creating the sandbox (e.g., 'my-template-id') */
|
|
277
|
+
e2bTemplateId?: string;
|
|
276
278
|
/** Cache key for element finding operations. If provided, enables caching tied to this key */
|
|
277
279
|
cacheKey?: string;
|
|
278
280
|
/** Reconnect to the last used sandbox instead of creating a new one. When true, provision methods (chrome, vscode, installer, etc.) will be skipped since the application is already running. Throws error if no previous sandbox exists. */
|
|
@@ -327,6 +329,8 @@ export interface ConnectOptions {
|
|
|
327
329
|
sandboxAmi?: string;
|
|
328
330
|
/** EC2 instance type for sandbox (e.g., 'i3.metal') */
|
|
329
331
|
sandboxInstance?: string;
|
|
332
|
+
/** E2B template ID to use when creating the sandbox (e.g., 'my-template-id') */
|
|
333
|
+
e2bTemplateId?: string;
|
|
330
334
|
/** Operating system for the sandbox (default: 'linux') */
|
|
331
335
|
os?: "windows" | "linux";
|
|
332
336
|
/**
|
package/sdk.js
CHANGED
|
@@ -481,7 +481,7 @@ class Element {
|
|
|
481
481
|
let cacheKey = null;
|
|
482
482
|
let cacheThreshold = null;
|
|
483
483
|
let perCommandThresholds = null; // Per-command { screen, element } override
|
|
484
|
-
let zoom =
|
|
484
|
+
let zoom = true; // Default to enabled
|
|
485
485
|
let perCommandAi = null; // Per-command AI config override
|
|
486
486
|
|
|
487
487
|
let minConfidence = null; // Minimum confidence threshold
|
|
@@ -494,8 +494,8 @@ class Element {
|
|
|
494
494
|
// New: options is an object with cacheKey and/or cacheThreshold
|
|
495
495
|
cacheKey = options.cacheKey || null;
|
|
496
496
|
cacheThreshold = options.cacheThreshold ?? null;
|
|
497
|
-
// zoom defaults to
|
|
498
|
-
zoom = options.zoom
|
|
497
|
+
// zoom defaults to true unless explicitly set to false
|
|
498
|
+
zoom = options.zoom !== false;
|
|
499
499
|
// Minimum confidence threshold: fail find if AI confidence is below this value
|
|
500
500
|
minConfidence = options.confidence ?? null;
|
|
501
501
|
// Element type hint for prompt wrapping
|
|
@@ -568,7 +568,7 @@ class Element {
|
|
|
568
568
|
cacheKey: cacheKey,
|
|
569
569
|
os: this.sdk.os,
|
|
570
570
|
resolution: this.sdk.resolution,
|
|
571
|
-
zoom: zoom,
|
|
571
|
+
zoom: zoom === true ? 1 : zoom === false ? 0 : zoom,
|
|
572
572
|
confidence: minConfidence,
|
|
573
573
|
type: elementType,
|
|
574
574
|
ai: {
|
|
@@ -623,6 +623,11 @@ class Element {
|
|
|
623
623
|
|
|
624
624
|
// Track find interaction once at the end (fire-and-forget, don't block)
|
|
625
625
|
const sessionId = this.sdk.getSessionId();
|
|
626
|
+
const findCacheHit = response?.cacheHit || response?.cache_hit || response?.cached || false;
|
|
627
|
+
// Increment local interaction counters
|
|
628
|
+
this.sdk._interactionStats.total++;
|
|
629
|
+
this.sdk._interactionStats.byType.find = (this.sdk._interactionStats.byType.find || 0) + 1;
|
|
630
|
+
if (findCacheHit) this.sdk._interactionStats.cached++;
|
|
626
631
|
if (sessionId && this.sdk.apiClient) {
|
|
627
632
|
this.sdk.apiClient
|
|
628
633
|
.req("interaction/track", {
|
|
@@ -632,11 +637,7 @@ class Element {
|
|
|
632
637
|
timestamp: absoluteTimestamp, // Absolute epoch timestamp - frontend calculates relative using clientStartDate
|
|
633
638
|
success: this._found,
|
|
634
639
|
error: findError,
|
|
635
|
-
cacheHit:
|
|
636
|
-
response?.cacheHit ||
|
|
637
|
-
response?.cache_hit ||
|
|
638
|
-
response?.cached ||
|
|
639
|
-
false,
|
|
640
|
+
cacheHit: findCacheHit,
|
|
640
641
|
selector: response?.selector,
|
|
641
642
|
selectorUsed: !!response?.selector,
|
|
642
643
|
confidence: response?.confidence ?? null,
|
|
@@ -1498,6 +1499,7 @@ class TestDriverSDK {
|
|
|
1498
1499
|
// Store sandbox configuration options
|
|
1499
1500
|
this.sandboxAmi = options.sandboxAmi || null;
|
|
1500
1501
|
this.sandboxInstance = options.sandboxInstance || null;
|
|
1502
|
+
this.e2bTemplateId = options.e2bTemplateId || null;
|
|
1501
1503
|
|
|
1502
1504
|
// Store reconnect preference from options
|
|
1503
1505
|
this.reconnect =
|
|
@@ -1616,6 +1618,12 @@ class TestDriverSDK {
|
|
|
1616
1618
|
// Uploaded to S3 at cleanup so they can be displayed alongside dashcam replays.
|
|
1617
1619
|
this._logBuffer = [];
|
|
1618
1620
|
|
|
1621
|
+
// API version discovered by _logEnvironmentInfo()
|
|
1622
|
+
this._apiVersion = null;
|
|
1623
|
+
|
|
1624
|
+
// Local interaction counters — incremented at each interaction/track call site
|
|
1625
|
+
this._interactionStats = { total: 0, cached: 0, byType: {} };
|
|
1626
|
+
|
|
1619
1627
|
// Set up event listeners once (they live for the lifetime of the SDK instance)
|
|
1620
1628
|
this._setupLogging();
|
|
1621
1629
|
|
|
@@ -2716,6 +2724,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2716
2724
|
* @param {string} options.ip - Direct IP address to connect to
|
|
2717
2725
|
* @param {string} options.sandboxAmi - AMI to use for the sandbox
|
|
2718
2726
|
* @param {string} options.sandboxInstance - Instance type for the sandbox
|
|
2727
|
+
* @param {string} options.e2bTemplateId - E2B template ID to use when creating the sandbox
|
|
2719
2728
|
* @param {string} options.os - Operating system for the sandbox (windows or linux)
|
|
2720
2729
|
* @param {boolean} options.reuseConnection - Reuse recent connection if available (default: true)
|
|
2721
2730
|
* @returns {Promise<Object>} Sandbox instance details
|
|
@@ -2804,6 +2813,12 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
2804
2813
|
} else if (this.sandboxInstance) {
|
|
2805
2814
|
this.agent.sandboxInstance = this.sandboxInstance;
|
|
2806
2815
|
}
|
|
2816
|
+
// Use e2bTemplateId from connectOptions if provided, otherwise fall back to constructor value
|
|
2817
|
+
if (connectOptions.e2bTemplateId !== undefined) {
|
|
2818
|
+
this.agent.e2bTemplateId = connectOptions.e2bTemplateId;
|
|
2819
|
+
} else if (this.e2bTemplateId) {
|
|
2820
|
+
this.agent.e2bTemplateId = this.e2bTemplateId;
|
|
2821
|
+
}
|
|
2807
2822
|
// Use os from connectOptions if provided, otherwise fall back to this.os
|
|
2808
2823
|
if (connectOptions.os !== undefined) {
|
|
2809
2824
|
this.agent.sandboxOs = connectOptions.os;
|
|
@@ -3193,6 +3208,11 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3193
3208
|
|
|
3194
3209
|
// Track successful findAll interaction (fire-and-forget, don't block)
|
|
3195
3210
|
const sessionId = this.getSessionId();
|
|
3211
|
+
const findAllCacheHit = response.cached || false;
|
|
3212
|
+
// Increment local interaction counters
|
|
3213
|
+
this._interactionStats.total++;
|
|
3214
|
+
this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
|
|
3215
|
+
if (findAllCacheHit) this._interactionStats.cached++;
|
|
3196
3216
|
if (sessionId && this.apiClient) {
|
|
3197
3217
|
this.apiClient
|
|
3198
3218
|
.req("interaction/track", {
|
|
@@ -3202,7 +3222,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3202
3222
|
timestamp: absoluteTimestamp, // Absolute epoch timestamp - frontend calculates relative using clientStartDate
|
|
3203
3223
|
success: true,
|
|
3204
3224
|
input: { count: elements.length },
|
|
3205
|
-
cacheHit:
|
|
3225
|
+
cacheHit: findAllCacheHit,
|
|
3206
3226
|
selector: response.selector,
|
|
3207
3227
|
selectorUsed: !!response.selector,
|
|
3208
3228
|
screenshotUrl: response.screenshotKey ?? null,
|
|
@@ -3248,6 +3268,11 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3248
3268
|
|
|
3249
3269
|
// No elements found - track interaction (fire-and-forget, don't block)
|
|
3250
3270
|
const sessionId = this.getSessionId();
|
|
3271
|
+
const noResultCacheHit = response?.cached || false;
|
|
3272
|
+
// Increment local interaction counters
|
|
3273
|
+
this._interactionStats.total++;
|
|
3274
|
+
this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
|
|
3275
|
+
if (noResultCacheHit) this._interactionStats.cached++;
|
|
3251
3276
|
if (sessionId && this.apiClient) {
|
|
3252
3277
|
this.apiClient
|
|
3253
3278
|
.req("interaction/track", {
|
|
@@ -3258,7 +3283,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3258
3283
|
success: false,
|
|
3259
3284
|
error: "No elements found",
|
|
3260
3285
|
input: { count: 0 },
|
|
3261
|
-
cacheHit:
|
|
3286
|
+
cacheHit: noResultCacheHit,
|
|
3262
3287
|
selector: response?.selector,
|
|
3263
3288
|
selectorUsed: !!response?.selector,
|
|
3264
3289
|
screenshotUrl: response?.screenshotKey ?? null,
|
|
@@ -3292,6 +3317,9 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3292
3317
|
|
|
3293
3318
|
// Track findAll error interaction (fire-and-forget, don't block)
|
|
3294
3319
|
const sessionId = this.getSessionId();
|
|
3320
|
+
// Increment local interaction counters
|
|
3321
|
+
this._interactionStats.total++;
|
|
3322
|
+
this._interactionStats.byType.findAll = (this._interactionStats.byType.findAll || 0) + 1;
|
|
3295
3323
|
if (sessionId && this.apiClient) {
|
|
3296
3324
|
this.apiClient
|
|
3297
3325
|
.req("interaction/track", {
|
|
@@ -3817,7 +3845,7 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3817
3845
|
const apiRoot = this.config?.TD_API_ROOT || 'unknown';
|
|
3818
3846
|
const apiKey = this.config?.TD_API_KEY || '';
|
|
3819
3847
|
const maskedKey = apiKey.length > 4 ? '***' + apiKey.slice(-4) : '(not set)';
|
|
3820
|
-
const env = process.env.TD_ENV || 'unknown';
|
|
3848
|
+
const env = process.env.TD_CHANNEL || process.env.TD_ENV || 'unknown';
|
|
3821
3849
|
const os = this.agent?.options?.os || process.env.TD_OS || 'linux';
|
|
3822
3850
|
const sdkVersion = require('./package.json').version;
|
|
3823
3851
|
|
|
@@ -3843,6 +3871,8 @@ CAPTCHA_SOLVER_EOF`,
|
|
|
3843
3871
|
res.on('end', () => {
|
|
3844
3872
|
try {
|
|
3845
3873
|
const info = JSON.parse(data);
|
|
3874
|
+
// Persist API version for test result metadata
|
|
3875
|
+
this._apiVersion = info.version || null;
|
|
3846
3876
|
const commit = info.commit || 'unknown';
|
|
3847
3877
|
const shortCommit = commit.substring(0, 7);
|
|
3848
3878
|
const commitUrl = commit !== 'unknown'
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Usage: ./install-dev-runner.sh <instance-id>
|
|
5
|
+
INSTANCE_ID="${1:?Usage: $0 <instance-id>}"
|
|
6
|
+
AWS_REGION="${AWS_REGION:-us-east-2}"
|
|
7
|
+
|
|
8
|
+
RUNNER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../runner" && pwd)"
|
|
9
|
+
|
|
10
|
+
echo "Packing local runner..."
|
|
11
|
+
TMPDIR=$(mktemp -d)
|
|
12
|
+
pushd "$RUNNER_DIR" > /dev/null
|
|
13
|
+
npm pack --pack-destination "$TMPDIR" > /dev/null 2>&1
|
|
14
|
+
TARBALL=$(ls "$TMPDIR"/*.tgz)
|
|
15
|
+
popd > /dev/null
|
|
16
|
+
echo "Tarball: $TARBALL"
|
|
17
|
+
|
|
18
|
+
echo "Uploading to S3..."
|
|
19
|
+
S3_KEY="runner-dev/$(date +%s)-$(openssl rand -hex 4)/runner.tgz"
|
|
20
|
+
aws s3 cp "$TARBALL" "s3://v7-transfer/${S3_KEY}" --region "$AWS_REGION" > /dev/null
|
|
21
|
+
DOWNLOAD_URL=$(aws s3 presign "s3://v7-transfer/${S3_KEY}" --expires-in 900 --region "$AWS_REGION")
|
|
22
|
+
rm -rf "$TMPDIR"
|
|
23
|
+
|
|
24
|
+
echo "Creating SSM params file..."
|
|
25
|
+
|
|
26
|
+
# Write Python script to temp file to generate valid JSON
|
|
27
|
+
PYTHON_SCRIPT=$(mktemp --suffix=.py)
|
|
28
|
+
cat > "$PYTHON_SCRIPT" << 'PYEOF'
|
|
29
|
+
import json
|
|
30
|
+
import sys
|
|
31
|
+
|
|
32
|
+
url = sys.argv[1]
|
|
33
|
+
|
|
34
|
+
commands = [
|
|
35
|
+
"Write-Host '=== Stopping runner ==='",
|
|
36
|
+
"Stop-ScheduledTask -TaskName RunTestDriverAgent -ErrorAction SilentlyContinue",
|
|
37
|
+
"Stop-Process -Name node -Force -ErrorAction SilentlyContinue",
|
|
38
|
+
"Start-Sleep -Seconds 2",
|
|
39
|
+
"Set-Location 'C:\\testdriver\\sandbox-agent'",
|
|
40
|
+
"$tarball = 'C:\\Windows\\Temp\\runner-dev.tgz'",
|
|
41
|
+
f"Invoke-WebRequest -Uri '{url}' -OutFile $tarball",
|
|
42
|
+
"Write-Host 'Tarball size:'; (Get-Item $tarball).Length",
|
|
43
|
+
"Remove-Item -Path lib -Recurse -Force -ErrorAction SilentlyContinue",
|
|
44
|
+
"tar -xzf $tarball --strip-components=1 -C .",
|
|
45
|
+
"Get-Content 'package.json' | ConvertFrom-Json | Select-Object -ExpandProperty version",
|
|
46
|
+
"Write-Host '=== Starting runner ==='",
|
|
47
|
+
"Start-ScheduledTask -TaskName RunTestDriverAgent",
|
|
48
|
+
"Start-Sleep -Seconds 3",
|
|
49
|
+
"Get-Content 'C:\\testdriver\\log.txt' -Tail 20"
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
params = {"commands": commands}
|
|
53
|
+
print(json.dumps(params))
|
|
54
|
+
PYEOF
|
|
55
|
+
|
|
56
|
+
python3 "$PYTHON_SCRIPT" "$DOWNLOAD_URL" > /tmp/ssm-install-params.json
|
|
57
|
+
rm "$PYTHON_SCRIPT"
|
|
58
|
+
|
|
59
|
+
echo "Sending SSM command..."
|
|
60
|
+
CMD_JSON=$(aws ssm send-command \
|
|
61
|
+
--region "$AWS_REGION" \
|
|
62
|
+
--instance-ids "$INSTANCE_ID" \
|
|
63
|
+
--document-name "AWS-RunPowerShellScript" \
|
|
64
|
+
--parameters "file:///tmp/ssm-install-params.json" \
|
|
65
|
+
--output json)
|
|
66
|
+
|
|
67
|
+
COMMAND_ID=$(echo "$CMD_JSON" | jq -r '.Command.CommandId')
|
|
68
|
+
echo "Command ID: $COMMAND_ID"
|
|
69
|
+
|
|
70
|
+
echo "Waiting for completion..."
|
|
71
|
+
aws ssm wait command-executed --region "$AWS_REGION" --command-id "$COMMAND_ID" --instance-id "$INSTANCE_ID" || true
|
|
72
|
+
|
|
73
|
+
echo "Getting output..."
|
|
74
|
+
aws ssm get-command-invocation \
|
|
75
|
+
--region "$AWS_REGION" \
|
|
76
|
+
--command-id "$COMMAND_ID" \
|
|
77
|
+
--instance-id "$INSTANCE_ID" \
|
|
78
|
+
--query 'StandardOutputContent' \
|
|
79
|
+
--output text
|