@testdriverai/runner 7.8.0-canary.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,556 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * presence-runner.js — Presence-based runner for TestDriver
4
+ *
5
+ * This runner registers itself in a team's Ably presence channel and waits
6
+ * to be claimed by an SDK session. This enables:
7
+ * - Discovery: SDK can see available runners
8
+ * - Claiming: SDK claims a specific runner for a session
9
+ * - Multi-runner: Multiple runners can be available for a team
10
+ *
11
+ * Flow:
12
+ * 1. Start with TD_API_KEY (required)
13
+ * 2. Call /api/v7/runner/register to get:
14
+ * - Ably token with presence capability
15
+ * - Runner channel name
16
+ * - Team ID
17
+ * 3. Enter presence on the runner channel
18
+ * 4. Listen for 'claim' messages
19
+ * 5. When claimed:
20
+ * - Exit presence (or mark as busy)
21
+ * - Connect to session channel
22
+ * - Handle commands until session ends
23
+ * 6. Re-enter presence when session ends (or exit)
24
+ *
25
+ * Environment variables:
26
+ * TD_API_KEY — Required: Team API key
27
+ * TD_API_ROOT — Optional: API URL (default: https://api.testdriver.ai)
28
+ * TD_RUNNER_ID — Optional: Runner ID (auto-generated if not set)
29
+ * TD_RUNNER_OS — Optional: OS capability (default: auto-detected)
30
+ * TD_RUNNER_SINGLE — Optional: Exit after one session (default: false)
31
+ */
32
+ require('../shared/load-env');
33
+ const os = require('os');
34
+ const fs = require('fs');
35
+ const Ably = require('ably');
36
+ const environments = require('../shared/environments.json');
37
+
38
+ const { Automation } = require('./lib/automation');
39
+ const { AblyService } = require('./lib/ably-service');
40
+
41
+ /**
42
+ * Get the local runner version from package.json.
43
+ */
44
+ function getLocalVersion() {
45
+ try {
46
+ if (process.env.RUNNER_VERSION) return process.env.RUNNER_VERSION;
47
+ const pkg = require('./package.json');
48
+ return pkg.version;
49
+ } catch { return null; }
50
+ }
51
+
52
+ const IS_WINDOWS = process.platform === 'win32';
53
+ const IS_LINUX = process.platform === 'linux';
54
+ const IS_MACOS = process.platform === 'darwin';
55
+
56
+ // ─── Configuration ───────────────────────────────────────────────────────────
57
+
58
+ const API_KEY = process.env.TD_API_KEY;
59
+ const envName = process.env.TD_ENV || 'stable';
60
+ const envConfig = environments[envName] || environments.stable;
61
+ const API_ROOT = process.env.TD_API_ROOT || process.env.API_ROOT || envConfig.apiRoot;
62
+ const RUNNER_ID = process.env.TD_RUNNER_ID;
63
+ const SINGLE_SESSION = process.env.TD_RUNNER_SINGLE === 'true';
64
+
65
+ if (!API_KEY) {
66
+ console.error('ERROR: TD_API_KEY environment variable is required');
67
+ console.error('Get your API key at: https://console.testdriver.ai/team');
68
+ process.exit(1);
69
+ }
70
+
71
+ // ─── Logging ─────────────────────────────────────────────────────────────────
72
+
73
+ const logFile = IS_WINDOWS
74
+ ? 'C:\\Windows\\Temp\\testdriver-runner.log'
75
+ : '/tmp/testdriver-runner.log';
76
+
77
+ function log(msg) {
78
+ const line = `[${new Date().toISOString()}] ${msg}`;
79
+ console.log(line);
80
+ try {
81
+ fs.appendFileSync(logFile, line + '\n');
82
+ } catch {}
83
+ }
84
+
85
+ // ─── Runner capabilities ─────────────────────────────────────────────────────
86
+
87
+ function getCapabilities() {
88
+ const noVncPort = detectNoVncPort();
89
+ return {
90
+ os: process.env.TD_RUNNER_OS || process.platform,
91
+ arch: os.arch(),
92
+ hostname: os.hostname(),
93
+ nodeVersion: process.version,
94
+ ip: getLocalIP(),
95
+ cpus: os.cpus().length,
96
+ memory: Math.round(os.totalmem() / (1024 * 1024 * 1024)) + 'GB',
97
+ noVncPort: noVncPort,
98
+ vncUrl: buildVncUrl(noVncPort),
99
+ };
100
+ }
101
+
102
+ /**
103
+ * Build the publicly-reachable noVNC URL for this runner.
104
+ *
105
+ * Priority:
106
+ * 1. TD_VNC_URL env var — explicit override (any infra)
107
+ * 2. E2B detection — hostname is the sandbox ID, URL is predictable
108
+ * 3. Fallback — http://{localIP}:{noVncPort}/vnc_lite.html (direct/private access)
109
+ *
110
+ * Returns null if noVNC is not available.
111
+ */
112
+ function buildVncUrl(noVncPort) {
113
+ // 1. Explicit override — works for any infrastructure
114
+ if (process.env.TD_VNC_URL) {
115
+ return process.env.TD_VNC_URL;
116
+ }
117
+
118
+ if (!noVncPort) return null;
119
+
120
+ // 2. E2B detection — check E2B_SANDBOX_ID env var first, then hostname heuristic
121
+ const e2bSandboxId = process.env.E2B_SANDBOX_ID;
122
+ if (e2bSandboxId) {
123
+ return `https://${noVncPort}-${e2bSandboxId}.e2b.dev/vnc_lite.html`;
124
+ }
125
+
126
+ // E2B hostnames can be various formats: "sb-xxxx-xxx", plain alphanumeric IDs, etc.
127
+ // Detect by checking for 169.254.x.x link-local IP (E2B sandbox networking) + non-standard hostname
128
+ const hostname = os.hostname();
129
+ const ip = getLocalIP();
130
+ if (ip && ip.startsWith('169.254.') && hostname && !/\./.test(hostname)) {
131
+ // Link-local IP + simple hostname strongly suggests E2B sandbox
132
+ return `https://${noVncPort}-${hostname}.e2b.dev/vnc_lite.html`;
133
+ }
134
+
135
+ // 3. Fallback: direct IP access (private networks, Docker, AWS with public IP, etc.)
136
+ return `http://${ip}:${noVncPort}/vnc_lite.html`;
137
+ }
138
+
139
+ /**
140
+ * Detect the noVNC websocket proxy port.
141
+ * Returns the port number (as a string) if noVNC is detected, or null.
142
+ *
143
+ * Detection strategy:
144
+ * 1. Check TD_NOVNC_PORT env var (explicit override)
145
+ * 2. On Linux/macOS: check if noVNC proxy process is listening (ss/lsof)
146
+ * 3. On Windows: check for noVNC or websockify on common ports (netstat)
147
+ * 4. Fallback: probe common noVNC ports (6080, 6081) via TCP connect
148
+ */
149
+ function detectNoVncPort() {
150
+ // Allow explicit override
151
+ if (process.env.TD_NOVNC_PORT) {
152
+ return process.env.TD_NOVNC_PORT;
153
+ }
154
+
155
+ const { execSync } = require('child_process');
156
+
157
+ try {
158
+ if (IS_WINDOWS) {
159
+ // Windows: check netstat for common noVNC/websockify ports
160
+ const output = execSync('netstat -an 2>nul', { encoding: 'utf8', timeout: 5000 });
161
+ for (const port of ['6080', '6081']) {
162
+ if (output.includes(`:${port}`) && output.includes('LISTENING')) {
163
+ return port;
164
+ }
165
+ }
166
+ } else {
167
+ // Linux/macOS: try ss first (faster), then lsof
168
+ try {
169
+ const output = execSync('ss -tlnp 2>/dev/null || lsof -iTCP -sTCP:LISTEN -P -n 2>/dev/null', {
170
+ encoding: 'utf8',
171
+ timeout: 5000,
172
+ });
173
+ for (const port of ['6080', '6081']) {
174
+ if (output.includes(`:${port}`)) {
175
+ return port;
176
+ }
177
+ }
178
+ } catch {}
179
+
180
+ // Fallback: check if novnc_proxy process is running
181
+ try {
182
+ const output = execSync('pgrep -af "novnc_proxy|websockify" 2>/dev/null', {
183
+ encoding: 'utf8',
184
+ timeout: 5000,
185
+ });
186
+ // Try to extract port from command line args (e.g. --listen 6080)
187
+ const match = output.match(/(?:--listen\s+|:)(\d{4,5})/);
188
+ if (match) return match[1];
189
+ // Process is running but can't determine port — assume default
190
+ if (output.trim()) return '6080';
191
+ } catch {}
192
+ }
193
+ } catch (err) {
194
+ // Detection failed silently — that's fine
195
+ }
196
+
197
+ return null;
198
+ }
199
+
200
+ function getLocalIP() {
201
+ const interfaces = os.networkInterfaces();
202
+ for (const name of Object.keys(interfaces)) {
203
+ for (const iface of interfaces[name]) {
204
+ if (iface.family === 'IPv4' && !iface.internal) {
205
+ return iface.address;
206
+ }
207
+ }
208
+ }
209
+ return '127.0.0.1';
210
+ }
211
+
212
+ // ─── API registration ────────────────────────────────────────────────────────
213
+
214
+ async function registerRunner() {
215
+ log(`Registering with API at ${API_ROOT}...`);
216
+
217
+ const capabilities = getCapabilities();
218
+ log(`Capabilities: ${JSON.stringify(capabilities)}`);
219
+
220
+ const response = await fetch(`${API_ROOT}/api/v7/runner/register`, {
221
+ method: 'POST',
222
+ headers: { 'Content-Type': 'application/json' },
223
+ body: JSON.stringify({
224
+ apiKey: API_KEY,
225
+ runnerId: RUNNER_ID,
226
+ capabilities,
227
+ version: require('./package.json').version,
228
+ }),
229
+ });
230
+
231
+ if (!response.ok) {
232
+ const text = await response.text();
233
+ throw new Error(`Registration failed (${response.status}): ${text}`);
234
+ }
235
+
236
+ const data = await response.json();
237
+ if (!data.success) {
238
+ throw new Error(`Registration failed: ${data.errorMessage}`);
239
+ }
240
+
241
+ return data;
242
+ }
243
+
244
+ // ─── Presence management ─────────────────────────────────────────────────────
245
+
246
+ class PresenceRunner {
247
+ constructor() {
248
+ this.ably = null;
249
+ this.runnerChannel = null;
250
+ this.runnerId = null;
251
+ this.teamId = null;
252
+ this.presenceData = null;
253
+ this.currentSession = null;
254
+ this.shuttingDown = false;
255
+ this.heartbeatInterval = null;
256
+ this.updateInfo = null;
257
+ }
258
+
259
+ async start() {
260
+ log('🚀 TestDriver Presence Runner starting...');
261
+
262
+ // Register with API first to get runner ID and team info
263
+ const registration = await registerRunner();
264
+ this.runnerId = registration.runnerId;
265
+ this.teamId = registration.teamId;
266
+ this.presenceData = registration.presenceData;
267
+
268
+ log(`Registered as runner: ${this.runnerId}`);
269
+ log(`Team ID: ${this.teamId}`);
270
+ log(`Runner channel: ${registration.ably.runnerChannel}`);
271
+
272
+ // Connect to Ably
273
+ this.ably = new Ably.Realtime({
274
+ authCallback: (tokenParams, callback) => {
275
+ callback(null, registration.ably.token);
276
+ },
277
+ clientId: `runner-${this.runnerId}`,
278
+ });
279
+
280
+ await new Promise((resolve, reject) => {
281
+ this.ably.connection.on('connected', resolve);
282
+ this.ably.connection.on('failed', (err) => {
283
+ reject(new Error(`Ably connection failed: ${err?.reason?.message || 'unknown'}`));
284
+ });
285
+ setTimeout(() => reject(new Error('Ably connection timeout')), 30000);
286
+ });
287
+
288
+ log('Connected to Ably');
289
+
290
+ // Monitor runner presence connection state
291
+ this.ably.connection.on((stateChange) => {
292
+ const { current, previous, reason, retryIn } = stateChange;
293
+ const reasonMsg = reason ? (reason.message || reason.code || String(reason)) : undefined;
294
+ log(`[ably] Presence connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
295
+ });
296
+
297
+ // Get runner channel and enter presence
298
+ this.runnerChannel = this.ably.channels.get(registration.ably.runnerChannel);
299
+
300
+ await this.enterPresence();
301
+
302
+ // Start heartbeat to update lastSeen for zombie detection
303
+ this.startHeartbeat();
304
+
305
+ // Listen for claim messages
306
+ this.runnerChannel.subscribe('claim', (msg) => this.handleClaim(msg));
307
+
308
+ log('✅ Runner ready — waiting for claims...');
309
+ log(` Runner ID: ${this.runnerId}`);
310
+ log(` IP: ${getLocalIP()}`);
311
+ }
312
+
313
+ async enterPresence() {
314
+ log('Entering presence...');
315
+ await this.runnerChannel.presence.enter({
316
+ ...this.presenceData,
317
+ status: 'available',
318
+ enteredAt: Date.now(),
319
+ lastSeen: Date.now(),
320
+ sandboxId: null,
321
+ });
322
+ log('Entered presence — runner is now available');
323
+ }
324
+
325
+ async leavePresence() {
326
+ log('Leaving presence...');
327
+ this.stopHeartbeat();
328
+ try {
329
+ await this.runnerChannel.presence.leave();
330
+ } catch (err) {
331
+ log(`Warning: Failed to leave presence: ${err.message}`);
332
+ }
333
+ }
334
+
335
+ startHeartbeat() {
336
+ // Update presence every 30 seconds with lastSeen timestamp
337
+ // This allows detection of zombie runners (present but not responding)
338
+ this.heartbeatInterval = setInterval(async () => {
339
+ if (this.shuttingDown) return;
340
+ try {
341
+ await this.runnerChannel.presence.update({
342
+ ...this.presenceData,
343
+ status: this.currentSession ? 'busy' : 'available',
344
+ sandboxId: this.currentSession?.sandboxId || null,
345
+ lastSeen: Date.now(),
346
+ });
347
+ } catch (err) {
348
+ log(`Warning: Heartbeat update failed: ${err.message}`);
349
+ }
350
+ }, 30000);
351
+ }
352
+
353
+ stopHeartbeat() {
354
+ if (this.heartbeatInterval) {
355
+ clearInterval(this.heartbeatInterval);
356
+ this.heartbeatInterval = null;
357
+ }
358
+ }
359
+
360
+ async updatePresenceStatus(status, extra = {}) {
361
+ try {
362
+ await this.runnerChannel.presence.update({
363
+ ...this.presenceData,
364
+ status,
365
+ lastSeen: Date.now(),
366
+ ...extra,
367
+ });
368
+ } catch (err) {
369
+ log(`Warning: Failed to update presence status: ${err.message}`);
370
+ }
371
+ }
372
+
373
+ async handleClaim(msg) {
374
+ const claim = msg.data;
375
+
376
+ // Check if this claim is for us
377
+ if (claim.targetRunner !== `runner-${this.runnerId}`) {
378
+ log(`Ignoring claim for ${claim.targetRunner} (we are runner-${this.runnerId})`);
379
+ return;
380
+ }
381
+
382
+ log(`📥 Received claim for sandbox: ${claim.sandboxId}`);
383
+ log(`[DEBUG] handleClaim started at ${Date.now()}`);
384
+
385
+ if (this.currentSession) {
386
+ log(`⚠️ Already in a session (${this.currentSession.sandboxId}), rejecting claim`);
387
+ // Publish rejection so the API can try another runner
388
+ await this.publishClaimResponse(claim.sandboxId, false, 'already_busy');
389
+ return;
390
+ }
391
+
392
+ // Mark as busy FIRST to prevent concurrent claims
393
+ this.currentSession = { sandboxId: claim.sandboxId, channel: claim.channel };
394
+
395
+ // Connect to session channel BEFORE acking so we're ready for commands
396
+ let ablyService, automation;
397
+ try {
398
+ log(`[DEBUG] Starting prepareSession at ${Date.now()}`);
399
+ ({ ablyService, automation } = await this.prepareSession(claim));
400
+ log(`[DEBUG] prepareSession completed at ${Date.now()}`);
401
+ } catch (err) {
402
+ log(`❌ Failed to prepare session: ${err.message}`);
403
+ this.currentSession = null;
404
+ await this.publishClaimResponse(claim.sandboxId, false, `prepare_failed: ${err.message}`);
405
+ return;
406
+ }
407
+
408
+ // Now that we're subscribed and ready, ack the claim
409
+ await this.publishClaimResponse(claim.sandboxId, true);
410
+
411
+ // Mark as busy in presence (stay in channel, just update status)
412
+ await this.updatePresenceStatus('busy', {
413
+ sandboxId: claim.sandboxId,
414
+ claimedAt: Date.now(),
415
+ });
416
+
417
+ // Run the session (waits for disconnect)
418
+ await this.runSession(claim.sandboxId, ablyService, automation);
419
+ }
420
+
421
+ /**
422
+ * Publish a claim-response on the runner channel so the API
423
+ * knows whether the claim was accepted or rejected.
424
+ */
425
+ async publishClaimResponse(sandboxId, accepted, reason) {
426
+ try {
427
+ log(`[DEBUG] About to publish claim-response. Channel state: ${this.runnerChannel.state}, Ably state: ${this.ably.connection.state}`);
428
+ const payload = {
429
+ runnerId: this.runnerId,
430
+ targetRunner: `runner-${this.runnerId}`,
431
+ sandboxId,
432
+ accepted,
433
+ reason: reason || (accepted ? 'ok' : 'rejected'),
434
+ timestamp: Date.now(),
435
+ };
436
+ log(`[DEBUG] claim-response payload: ${JSON.stringify(payload)}`);
437
+ await this.runnerChannel.publish('claim-response', payload);
438
+ log(`Published claim-response: ${accepted ? 'accepted' : 'rejected'} (sandbox=${sandboxId})`);
439
+ } catch (err) {
440
+ log(`⚠️ Failed to publish claim-response: ${err.message}`);
441
+ log(`[DEBUG] Publish error stack: ${err.stack}`);
442
+ }
443
+ }
444
+
445
+ /**
446
+ * Prepare a session — connect to Ably and subscribe to session channel.
447
+ * Returns the connected ablyService and automation, but does NOT
448
+ * wait for disconnect. Call runSession() after acking the claim.
449
+ */
450
+ async prepareSession(claim) {
451
+ const { sandboxId, channel, token, config } = claim;
452
+
453
+ log(`🔌 Starting session: ${sandboxId}`);
454
+
455
+ // Create automation instance for this session with sandbox context
456
+ const automation = new Automation({
457
+ sandboxId,
458
+ apiRoot: API_ROOT,
459
+ apiKey: API_KEY,
460
+ });
461
+
462
+ // Connect to session channel
463
+ const ablyService = new AblyService({
464
+ automation,
465
+ ablyToken: token,
466
+ channel,
467
+ sandboxId,
468
+ clientId: `agent-${sandboxId}`,
469
+ apiRoot: API_ROOT,
470
+ apiKey: API_KEY,
471
+ updateInfo: this.updateInfo,
472
+ });
473
+
474
+ ablyService.on('log', (msg) => log(`[session] ${msg}`));
475
+ ablyService.on('error', (err) => log(`[session] ERROR: ${err.message}`));
476
+
477
+ await ablyService.connect();
478
+ log(`✅ Session active: ${sandboxId}`);
479
+
480
+ return { ablyService, automation };
481
+ }
482
+
483
+ /**
484
+ * Run a session — wait for it to end (disconnect or error), then clean up.
485
+ */
486
+ async runSession(sandboxId, ablyService, automation) {
487
+ try {
488
+ // Wait for session to end (disconnect or error)
489
+ await new Promise((resolve) => {
490
+ ablyService.on('disconnected', resolve);
491
+ ablyService.on('error', resolve);
492
+ });
493
+
494
+ log(`📤 Session ended: ${sandboxId}`);
495
+ } catch (err) {
496
+ log(`❌ Session error: ${err.message}`);
497
+ } finally {
498
+ await ablyService.close();
499
+ automation.cleanup();
500
+ this.currentSession = null;
501
+
502
+ if (this.shuttingDown) {
503
+ return;
504
+ }
505
+
506
+ if (SINGLE_SESSION) {
507
+ log('Single session mode — exiting');
508
+ await this.shutdown();
509
+ } else {
510
+ // Update presence status back to available (stay in channel)
511
+ await this.updatePresenceStatus('available', {
512
+ sandboxId: null,
513
+ claimedAt: null,
514
+ });
515
+ log('🔄 Ready for next session');
516
+ }
517
+ }
518
+ }
519
+
520
+ async shutdown() {
521
+ this.shuttingDown = true;
522
+ log('Shutting down...');
523
+
524
+ await this.leavePresence();
525
+
526
+ if (this.ably) {
527
+ this.ably.close();
528
+ }
529
+
530
+ process.exit(0);
531
+ }
532
+ }
533
+
534
+ // ─── Main ────────────────────────────────────────────────────────────────────
535
+
536
+ const runner = new PresenceRunner();
537
+
538
+ async function main() {
539
+ log(`Runner version: ${getLocalVersion() || 'unknown'}`);
540
+ // Updates are applied via SSM before config is written to the instance.
541
+ // No self-update needed here.
542
+ await runner.start();
543
+
544
+ // Graceful shutdown
545
+ process.on('SIGINT', () => runner.shutdown());
546
+ process.on('SIGTERM', () => runner.shutdown());
547
+
548
+ // Keep alive
549
+ setInterval(() => {}, 60000);
550
+ }
551
+
552
+ main().catch((err) => {
553
+ log(`Fatal error: ${err.message}`);
554
+ console.error(err);
555
+ process.exit(1);
556
+ });