@testdriverai/runner 7.8.0-canary.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,786 @@
1
+ /**
2
+ * automation.js — Cross-platform desktop automation module (Node.js)
3
+ *
4
+ * Uses pyautogui (Python) via subprocess calls for mouse, keyboard, and
5
+ * screenshot operations. The pyautogui + pyautogui-cli packages are installed
6
+ * via pip in the Dockerfile / packer build. Uses child_process for exec,
7
+ * focus-window, get-active-window, and network commands.
8
+ *
9
+ * Same command interface as pyautogui-cli.py:
10
+ * dispatch(command, data) → Promise<result>
11
+ *
12
+ * Protocol (matches pyautogui-cli.py):
13
+ * Returns the "result" value on success (true, base64 string, object, etc.)
14
+ * Throws on error (caller wraps into { error: message })
15
+ */
16
+ const { spawn, execFile } = require('child_process');
17
+ const path = require('path');
18
+ const fs = require('fs');
19
+ const os = require('os');
20
+ const { EventEmitter } = require('events');
21
+
22
+ const IS_WINDOWS = process.platform === 'win32';
23
+ const IS_LINUX = process.platform === 'linux';
24
+ const IS_MACOS = process.platform === 'darwin';
25
+
26
+ // ─── Ensure DISPLAY is set on Linux ──────────────────────────────────────────
27
+ // The entrypoint runs the runner as 'user' (via runuser) so it shares the X
28
+ // session directly. DISPLAY should already be inherited, but set a fallback
29
+ // just in case.
30
+ if (IS_LINUX && !process.env.DISPLAY) {
31
+ process.env.DISPLAY = ':0';
32
+ }
33
+
34
+ // ─── Configuration ───────────────────────────────────────────────────────────
35
+
36
+ const API_ROOT = process.env.TD_API_ROOT || process.env.API_ROOT || 'https://api.testdriver.ai';
37
+ const API_KEY = process.env.TD_API_KEY;
38
+
39
+ // ─── pyautogui via Python subprocess ─────────────────────────────────────────
40
+ // Calls pyautogui functions via `python3 -c` (or `python -c` on Windows).
41
+ // Each command spawns a short-lived Python process. The pyautogui library is
42
+ // installed via `pip install pyautogui-cli` (which depends on pyautogui).
43
+ //
44
+ // String arguments (text, key names) are passed via sys.argv to avoid
45
+ // shell injection and escaping issues.
46
+
47
+ const PYTHON = IS_WINDOWS ? 'python' : 'python3';
48
+ const PY_IMPORT = IS_LINUX
49
+ ? "import os; os.environ['DISPLAY'] = ':0'; import pyautogui, sys; pyautogui.FAILSAFE = False; "
50
+ : 'import pyautogui, sys; pyautogui.FAILSAFE = False; ';
51
+
52
+ /**
53
+ * Run a pyautogui Python script via subprocess.
54
+ * @param {string} script — Python code (pyautogui + sys are already imported via PY_IMPORT prefix)
55
+ * @param {string[]} [extraArgs=[]] — additional args available as sys.argv[1], sys.argv[2], etc.
56
+ * @param {number} [timeout=15000] — subprocess timeout in ms
57
+ * @returns {Promise<string>} stdout output (trimmed)
58
+ */
59
+ function runPyAutoGUI(script, extraArgs = [], timeout = 15000) {
60
+ return new Promise((resolve, reject) => {
61
+ // Always force DISPLAY=:0 on Linux so pyautogui can connect to X11
62
+ const env = { ...process.env };
63
+ if (IS_LINUX) {
64
+ env.DISPLAY = env.DISPLAY || ':0';
65
+ }
66
+ execFile(PYTHON, ['-c', PY_IMPORT + script, ...extraArgs.map(String)], {
67
+ encoding: 'utf-8',
68
+ timeout,
69
+ env,
70
+ }, (err, stdout, stderr) => {
71
+ if (err) {
72
+ const msg = (stderr || '').trim() || err.message;
73
+ reject(new Error(`pyautogui command failed: ${msg}`));
74
+ } else {
75
+ resolve((stdout || '').trim());
76
+ }
77
+ });
78
+ });
79
+ }
80
+
81
+ // ─── Persistent shell session ────────────────────────────────────────────────
82
+
83
+ class ShellSession {
84
+ constructor() {
85
+ this._process = null;
86
+ this._stderrLines = [];
87
+ this._lock = false;
88
+ }
89
+
90
+ _ensureStarted() {
91
+ if (this._process && this._process.exitCode === null) return;
92
+
93
+ let cmd;
94
+ let env;
95
+
96
+ if (IS_WINDOWS) {
97
+ cmd = ['powershell', ['-NoExit', '-ExecutionPolicy', 'Bypass', '-Command', '-']];
98
+ } else {
99
+ cmd = ['/bin/bash', ['--norc', '--noprofile', '-i']];
100
+ }
101
+ env = { ...process.env };
102
+
103
+ this._process = spawn(cmd[0], cmd[1], {
104
+ stdio: ['pipe', 'pipe', 'pipe'],
105
+ env,
106
+ });
107
+
108
+ this._process.stderr.on('data', (chunk) => {
109
+ this._stderrLines.push(chunk.toString());
110
+ });
111
+
112
+ this._process.on('error', (err) => {
113
+ console.error(`[automation] Shell error: ${err.message}`);
114
+ });
115
+
116
+ this._process.on('exit', (code) => {
117
+ console.log(`[automation] Shell exited (code=${code})`);
118
+ this._process = null;
119
+ });
120
+ }
121
+
122
+ /**
123
+ * Run a command in the persistent shell session.
124
+ * @param {string} command - The command to run
125
+ * @param {number} [timeout=30] - Timeout in seconds
126
+ * @param {object} [options] - Additional options
127
+ * @param {function} [options.onChunk] - Callback for streaming stdout chunks: (text: string) => void
128
+ * @returns {Promise<{stdout: string, stderr: string, returncode: number, success: boolean}>}
129
+ */
130
+ async run(command, timeout = 30, options = {}) {
131
+ const { onChunk } = options;
132
+
133
+ // Wait for any in-flight command (simple serial lock)
134
+ while (this._lock) {
135
+ await new Promise(r => setTimeout(r, 50));
136
+ }
137
+ this._lock = true;
138
+
139
+ try {
140
+ this._ensureStarted();
141
+
142
+ const delimiter = `###END_OF_OUTPUT_${Date.now()}_${Math.random().toString(36).slice(2)}###`;
143
+ const fullCommand = IS_WINDOWS
144
+ ? `${command}\nWrite-Output '${delimiter}'\n`
145
+ : `${command}\necho '${delimiter}'\n`;
146
+
147
+ // Clear stderr buffer
148
+ this._stderrLines = [];
149
+
150
+ return await new Promise((resolve, reject) => {
151
+ const timeoutMs = timeout * 1000;
152
+ const timer = setTimeout(() => {
153
+ cleanup();
154
+ // Kill the shell process so the hung command doesn't block
155
+ // subsequent commands. A fresh shell will be spawned on next run().
156
+ console.warn(`[shell] Command timed out after ${timeout}s — killing shell to recover`);
157
+ try {
158
+ if (this._process) {
159
+ this._process.stdin.end();
160
+ this._process.kill('SIGKILL');
161
+ }
162
+ } catch (e) {
163
+ console.warn('[shell] Failed to kill timed-out shell:', e.message);
164
+ }
165
+ this._process = null;
166
+ resolve({
167
+ stdout: '',
168
+ stderr: 'Command timed out',
169
+ returncode: -1,
170
+ success: false,
171
+ });
172
+ }, timeoutMs);
173
+
174
+ const stdoutLines = [];
175
+
176
+ const onData = (chunk) => {
177
+ const text = chunk.toString();
178
+ const lines = text.split('\n');
179
+ for (const line of lines) {
180
+ if (line.includes(delimiter)) {
181
+ cleanup();
182
+ const stdout = stdoutLines.join('\n').replace(/\n$/, '');
183
+ const stderr = this._stderrLines.join('').replace(/\n$/, '');
184
+ resolve({
185
+ stdout,
186
+ stderr,
187
+ returncode: 0,
188
+ success: true,
189
+ });
190
+ return;
191
+ }
192
+ stdoutLines.push(line);
193
+ }
194
+
195
+ // Stream chunk to caller if callback provided
196
+ if (onChunk) {
197
+ try {
198
+ onChunk(text);
199
+ } catch (e) {
200
+ // Don't let streaming errors break the command
201
+ console.warn('[shell] onChunk callback error:', e.message);
202
+ }
203
+ }
204
+ };
205
+
206
+ const cleanup = () => {
207
+ clearTimeout(timer);
208
+ this._process.stdout.removeListener('data', onData);
209
+ };
210
+
211
+ this._process.stdout.on('data', onData);
212
+ this._process.stdin.write(fullCommand);
213
+ });
214
+ } catch (err) {
215
+ return {
216
+ stdout: '',
217
+ stderr: err.message,
218
+ returncode: -1,
219
+ success: false,
220
+ };
221
+ } finally {
222
+ this._lock = false;
223
+ }
224
+ }
225
+
226
+ cleanup() {
227
+ if (this._process) {
228
+ try {
229
+ this._process.stdin.end();
230
+ this._process.kill();
231
+ } catch {}
232
+ this._process = null;
233
+ }
234
+ }
235
+ }
236
+
237
+ // ─── Script runner helper (matches pyautogui-cli.py's run_script) ────────────
238
+
239
+ function runScript(args, timeout = 30) {
240
+ return new Promise((resolve) => {
241
+ const spawnArgs = args;
242
+ const env = { ...process.env };
243
+
244
+ const proc = spawn(spawnArgs[0], spawnArgs.slice(1), {
245
+ stdio: ['ignore', 'pipe', 'pipe'],
246
+ env,
247
+ });
248
+
249
+ let stdout = '';
250
+ let stderr = '';
251
+
252
+ proc.stdout.on('data', (chunk) => { stdout += chunk.toString(); });
253
+ proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
254
+
255
+ const timer = setTimeout(() => {
256
+ proc.kill();
257
+ resolve({
258
+ stdout: '',
259
+ stderr: 'Command timed out',
260
+ returncode: -1,
261
+ success: false,
262
+ });
263
+ }, timeout * 1000);
264
+
265
+ proc.on('close', (code) => {
266
+ clearTimeout(timer);
267
+ resolve({
268
+ stdout,
269
+ stderr,
270
+ returncode: code,
271
+ success: code === 0,
272
+ });
273
+ });
274
+
275
+ proc.on('error', (err) => {
276
+ clearTimeout(timer);
277
+ resolve({
278
+ stdout: '',
279
+ stderr: err.message,
280
+ returncode: -1,
281
+ success: false,
282
+ });
283
+ });
284
+ });
285
+ }
286
+
287
+ // ─── Automation class ────────────────────────────────────────────────────────
288
+
289
+ class Automation extends EventEmitter {
290
+ constructor(options = {}) {
291
+ super();
292
+ this._shell = new ShellSession();
293
+ this._sandboxId = options.sandboxId;
294
+ this._apiRoot = options.apiRoot;
295
+ this._apiKey = options.apiKey;
296
+ }
297
+
298
+ /**
299
+ * Release all modifier keys to prevent stuck keys.
300
+ * Mirrors pyautogui-cli.py's release_modifiers().
301
+ * Done in a single Python call for efficiency.
302
+ */
303
+ async releaseModifiers() {
304
+ try {
305
+ await runPyAutoGUI(
306
+ "for k in ['shift','shiftleft','shiftright','ctrl','ctrlleft','ctrlright'," +
307
+ "'alt','altleft','altright','command','win','winleft','winright']:\n" +
308
+ " try: pyautogui.keyUp(k)\n" +
309
+ " except: pass"
310
+ );
311
+ } catch {}
312
+ }
313
+
314
+ /**
315
+ * Dispatch a command by name — same interface as pyautogui-cli.py's dispatch_command.
316
+ * @param {string} command - Command name (e.g. 'click', 'screenshot', 'exec')
317
+ * @param {object} data - Command data/parameters
318
+ * @returns {Promise<*>} Result value (true, base64 string, object, etc.)
319
+ */
320
+ async dispatch(command, data = {}) {
321
+ // Normalize command: strip 'commands.' or 'system.' prefix
322
+ let normalizedCommand = command;
323
+ if (command && command.startsWith('commands.')) {
324
+ normalizedCommand = command.slice('commands.'.length);
325
+ } else if (command && command.startsWith('system.')) {
326
+ normalizedCommand = command.slice('system.'.length);
327
+ }
328
+ // Normalize camelCase to lowercase (e.g. leftClick → leftclick, mouseDown → mousedown)
329
+ normalizedCommand = normalizedCommand.toLowerCase();
330
+
331
+ console.log(`[automation] dispatch: "${command}" → "${normalizedCommand}", data keys: ${Object.keys(data).join(',')}`);
332
+ this.emit('log', `dispatch: "${command}" → "${normalizedCommand}"`);
333
+ if (normalizedCommand === 'write') {
334
+ console.log(`[automation] write text: "${data.text}" (length: ${data.text?.length})`);
335
+ this.emit('log', `write text: "${data.text}" (length: ${data.text?.length})`);
336
+ }
337
+ if (normalizedCommand === 'press') {
338
+ console.log(`[automation] press keys:`, JSON.stringify(data.keys || data.key));
339
+ this.emit('log', `press keys: ${JSON.stringify(data.keys || data.key)}`);
340
+ }
341
+
342
+ switch (normalizedCommand) {
343
+ case 'move':
344
+ case 'movemouse': {
345
+ console.log(`[automation] move: (${data.x},${data.y})`);
346
+ await runPyAutoGUI(`pyautogui.moveTo(${data.x}, ${data.y})`);
347
+ return true;
348
+ }
349
+
350
+ case 'moverel': {
351
+ await runPyAutoGUI(`pyautogui.moveRel(${data.x}, ${data.y})`);
352
+ return true;
353
+ }
354
+
355
+ case 'click':
356
+ case 'leftclick': {
357
+ const button = data.button || 'left';
358
+ if (data.x != null && data.y != null) {
359
+ console.log(`[automation] click: (${data.x},${data.y})`);
360
+ await runPyAutoGUI(`pyautogui.click(${data.x}, ${data.y}, button=sys.argv[1])`, [button]);
361
+ } else {
362
+ await runPyAutoGUI(`pyautogui.click(button=sys.argv[1])`, [button]);
363
+ }
364
+ return true;
365
+ }
366
+
367
+ case 'rightclick': {
368
+ if (data.x != null && data.y != null) {
369
+ await runPyAutoGUI(`pyautogui.rightClick(${data.x}, ${data.y})`);
370
+ } else {
371
+ await runPyAutoGUI('pyautogui.rightClick()');
372
+ }
373
+ return true;
374
+ }
375
+
376
+ case 'middleclick': {
377
+ if (data.x != null && data.y != null) {
378
+ await runPyAutoGUI(`pyautogui.middleClick(${data.x}, ${data.y})`);
379
+ } else {
380
+ await runPyAutoGUI('pyautogui.middleClick()');
381
+ }
382
+ return true;
383
+ }
384
+
385
+ case 'doubleclick': {
386
+ if (data.x != null && data.y != null) {
387
+ await runPyAutoGUI(`pyautogui.doubleClick(${data.x}, ${data.y})`);
388
+ } else {
389
+ await runPyAutoGUI('pyautogui.doubleClick()');
390
+ }
391
+ return true;
392
+ }
393
+
394
+ case 'tripleclick': {
395
+ if (data.x != null && data.y != null) {
396
+ await runPyAutoGUI(`pyautogui.tripleClick(${data.x}, ${data.y})`);
397
+ } else {
398
+ await runPyAutoGUI('pyautogui.tripleClick()');
399
+ }
400
+ return true;
401
+ }
402
+
403
+ case 'mousedown':
404
+ case 'mousepress': {
405
+ const button = data.button || 'left';
406
+ if (data.x != null && data.y != null) {
407
+ await runPyAutoGUI(`pyautogui.mouseDown(x=${data.x}, y=${data.y}, button=sys.argv[1])`, [button]);
408
+ } else {
409
+ await runPyAutoGUI(`pyautogui.mouseDown(button=sys.argv[1])`, [button]);
410
+ }
411
+ return true;
412
+ }
413
+
414
+ case 'mouseup':
415
+ case 'mouserelease': {
416
+ const button = data.button || 'left';
417
+ if (data.x != null && data.y != null) {
418
+ await runPyAutoGUI(`pyautogui.mouseUp(x=${data.x}, y=${data.y}, button=sys.argv[1])`, [button]);
419
+ } else {
420
+ await runPyAutoGUI(`pyautogui.mouseUp(button=sys.argv[1])`, [button]);
421
+ }
422
+ return true;
423
+ }
424
+
425
+ case 'scroll': {
426
+ const raw = data.amount || 0;
427
+ // pyautogui.scroll(): positive = up, negative = down
428
+ const amount = data.direction === 'down' ? -Math.abs(raw) : Math.abs(raw);
429
+ await runPyAutoGUI(`pyautogui.scroll(${amount})`);
430
+ return true;
431
+ }
432
+
433
+ case 'write': {
434
+ // Type each character — matches pyautogui.write() behavior
435
+ // Text passed via sys.argv[1] to avoid escaping issues
436
+ await runPyAutoGUI('pyautogui.write(sys.argv[1])', [data.text || '']);
437
+ await this.releaseModifiers();
438
+ return true;
439
+ }
440
+
441
+ case 'press': {
442
+ // SDK sends { keys: [...] } (array) or { key: 'x' } (singular)
443
+ if (data.keys && Array.isArray(data.keys)) {
444
+ // Multiple keys = hotkey/combo — pass via sys.argv[1:]
445
+ await runPyAutoGUI('pyautogui.hotkey(*sys.argv[1:])', data.keys);
446
+ } else {
447
+ const key = data.key || data.keys;
448
+ await runPyAutoGUI('pyautogui.press(sys.argv[1])', [key]);
449
+ }
450
+ await this.releaseModifiers();
451
+ return true;
452
+ }
453
+
454
+ case 'hotkey': {
455
+ // Pass all keys via sys.argv[1:]
456
+ await runPyAutoGUI('pyautogui.hotkey(*sys.argv[1:])', data.keys);
457
+ await this.releaseModifiers();
458
+ return true;
459
+ }
460
+
461
+ case 'keydown': {
462
+ await runPyAutoGUI('pyautogui.keyDown(sys.argv[1])', [data.key]);
463
+ return true;
464
+ }
465
+
466
+ case 'keyup': {
467
+ await runPyAutoGUI('pyautogui.keyUp(sys.argv[1])', [data.key]);
468
+ return true;
469
+ }
470
+
471
+ case 'get-mouse-position': {
472
+ const output = await runPyAutoGUI(
473
+ 'pos = pyautogui.position(); print(f"{pos.x},{pos.y}")'
474
+ );
475
+ const [px, py] = output.split(',').map(Number);
476
+ return { x: px, y: py };
477
+ }
478
+
479
+ case 'get-screen-size': {
480
+ const output = await runPyAutoGUI(
481
+ 'size = pyautogui.size(); print(f"{size.width},{size.height}")'
482
+ );
483
+ const [w, h] = output.split(',').map(Number);
484
+ return { width: w, height: h };
485
+ }
486
+
487
+ case 'alert': {
488
+ // No direct equivalent in Node — execute via shell
489
+ if (IS_LINUX) {
490
+ await runScript(['zenity', '--info', '--text', data.text], 10);
491
+ } else if (IS_MACOS) {
492
+ await runScript(['osascript', '-e', `display dialog "${data.text}"`], 10);
493
+ } else {
494
+ await runScript(['powershell', '-Command', `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.MessageBox]::Show("${data.text}")`], 10);
495
+ }
496
+ return true;
497
+ }
498
+
499
+ case 'focus-window': {
500
+ return await this._focusWindow(data);
501
+ }
502
+
503
+ case 'focus-application': {
504
+ return await this._focusWindow({
505
+ ...data,
506
+ title: data.title || data.name || data.appName || '',
507
+ });
508
+ }
509
+
510
+ case 'get-active-window': {
511
+ return await this._getActiveWindow(data);
512
+ }
513
+
514
+ case 'network': {
515
+ return await this._getNetwork(data);
516
+ }
517
+
518
+ case 'exec': {
519
+ const timeout = data.timeout || 30;
520
+ return await this._shell.run(data.command, timeout);
521
+ }
522
+
523
+ // SDK sends 'commands.run' for exec — map to shell command
524
+ case 'run': {
525
+ const timeout = Math.ceil((data.timeout || 300000) / 1000); // ms to seconds
526
+ const requestId = data.requestId;
527
+
528
+ // Buffer stdout chunks to ~16KB before emitting over Ably.
529
+ // This reduces message count while keeping each message well under
530
+ // Ably's 64KB limit. The SDK accumulates these chunks and reconstructs
531
+ // the full stdout — the final response only carries returncode + stderr.
532
+ const CHUNK_FLUSH_SIZE = 16 * 1024; // 16KB
533
+ let chunkBuffer = '';
534
+ const flushChunkBuffer = () => {
535
+ if (chunkBuffer.length > 0) {
536
+ this.emit('exec.output', { requestId, chunk: chunkBuffer });
537
+ chunkBuffer = '';
538
+ }
539
+ };
540
+
541
+ try {
542
+ const result = await this._shell.run(data.command, timeout, {
543
+ onChunk: (text) => {
544
+ chunkBuffer += text;
545
+ if (chunkBuffer.length >= CHUNK_FLUSH_SIZE) {
546
+ flushChunkBuffer();
547
+ }
548
+ },
549
+ });
550
+
551
+ // Flush any remaining buffered output
552
+ flushChunkBuffer();
553
+
554
+ // Only send returncode + stderr in the final response.
555
+ // stdout was already streamed via exec.output chunks — sending it
556
+ // again here would duplicate data AND can exceed Ably's 64KB message
557
+ // limit when the command produces large output (e.g. dashcam stop).
558
+ return {
559
+ out: {
560
+ returncode: result.returncode,
561
+ stdout: '',
562
+ stderr: result.stderr,
563
+ },
564
+ };
565
+ } catch (err) {
566
+ flushChunkBuffer();
567
+ return {
568
+ out: {
569
+ returncode: 1,
570
+ stdout: '',
571
+ stderr: err.message,
572
+ },
573
+ };
574
+ }
575
+ }
576
+
577
+ case 'extract':
578
+ case 'remember':
579
+ case 'screenshot': {
580
+ console.log(`[automation] Handling ${normalizedCommand} command - will capture and upload to S3`);
581
+ // For extract/remember: capture screenshot, upload to S3, return s3Key
582
+ // SDK will then call API with the s3Key
583
+ // For system.screenshot (normalized to 'screenshot'): same flow (return s3Key instead of base64)
584
+
585
+ // Capture screenshot locally
586
+ console.log('[automation] Step 1: Capturing screenshot...');
587
+ const screenshot = await this._captureScreenshot();
588
+ console.log(`[automation] Step 2: Screenshot captured, size: ${screenshot.length} bytes (base64)`);
589
+ const buffer = Buffer.from(screenshot, 'base64');
590
+ console.log(`[automation] Step 3: Buffer created, size: ${buffer.length} bytes`);
591
+
592
+ // Upload screenshot to S3 and return key
593
+ console.log('[automation] Step 4: Uploading to S3...');
594
+ const s3Key = await this._uploadToS3(buffer, this._sandboxId, 'image/png');
595
+ console.log(`[automation] Step 5: Upload complete, s3Key: ${s3Key}`);
596
+
597
+ return { s3Key };
598
+ }
599
+
600
+ case 'ping':
601
+ return 'pong';
602
+
603
+ default:
604
+ throw new Error(`Unknown command: ${command}`);
605
+ }
606
+ }
607
+
608
+ // ── S3 Upload (for large screenshots that exceed Ably limits) ──────
609
+
610
+ async _uploadToS3(buffer, sandboxId, contentType = 'image/jpeg') {
611
+ const apiKey = this._apiKey || API_KEY;
612
+ if (!apiKey || !sandboxId) {
613
+ throw new Error('API_KEY and sandboxId required for S3 upload');
614
+ }
615
+
616
+ // Use instance-level apiRoot (passed from runner) with fallback to module-level constant
617
+ const apiRoot = this._apiRoot || API_ROOT;
618
+
619
+ // Get presigned URL from API (30s timeout)
620
+ const response = await fetch(`${apiRoot}/api/v7/runner/upload-url`, {
621
+ method: 'POST',
622
+ headers: { 'Content-Type': 'application/json' },
623
+ body: JSON.stringify({
624
+ apiKey,
625
+ sandboxId,
626
+ contentType,
627
+ }),
628
+ signal: AbortSignal.timeout(30000),
629
+ });
630
+
631
+ if (!response.ok) {
632
+ const text = await response.text();
633
+ throw new Error(`Failed to get upload URL: ${response.status} ${text}`);
634
+ }
635
+
636
+ const { uploadUrl, s3Key } = await response.json();
637
+ console.log(`[automation] Got S3 upload URL, s3Key: ${s3Key}`);
638
+
639
+ // Upload to S3 (30s timeout)
640
+ const uploadResponse = await fetch(uploadUrl, {
641
+ method: 'PUT',
642
+ headers: { 'Content-Type': contentType },
643
+ body: buffer,
644
+ signal: AbortSignal.timeout(30000),
645
+ });
646
+
647
+ if (!uploadResponse.ok) {
648
+ console.error(`[automation] S3 upload failed: status=${uploadResponse.status}`);
649
+ throw new Error(`S3 upload failed: ${uploadResponse.status}`);
650
+ }
651
+
652
+ console.log(`[automation] S3 upload successful, returning s3Key: ${s3Key}`);
653
+ return s3Key;
654
+ }
655
+
656
+ // ── Screenshot (highest quality PNG, via pyautogui → temp file → sharp) ──
657
+
658
+ async _captureScreenshot() {
659
+ const sharp = require('sharp');
660
+ const tmpFile = path.join(os.tmpdir(), `td_screenshot_${Date.now()}.png`);
661
+
662
+ try {
663
+ // Capture screenshot via pyautogui → saves to temp file
664
+ // Python handles Retina downscale: if physical size differs from logical,
665
+ // the image is resized to logical dimensions before saving.
666
+ await runPyAutoGUI(
667
+ 'img = pyautogui.screenshot()\n' +
668
+ 'logical = pyautogui.size()\n' +
669
+ 'if img.size[0] != logical[0] or img.size[1] != logical[1]:\n' +
670
+ ' from PIL import Image\n' +
671
+ ' img = img.resize((logical[0], logical[1]), Image.LANCZOS)\n' +
672
+ 'img.save(sys.argv[1], format="PNG")',
673
+ [tmpFile],
674
+ 20000
675
+ );
676
+
677
+ // Read the PNG and re-encode with sharp (lossless, no compression)
678
+ const pngBuffer = fs.readFileSync(tmpFile);
679
+ const buffer = await sharp(pngBuffer)
680
+ .png({ compressionLevel: 0 })
681
+ .toBuffer();
682
+
683
+ return buffer.toString('base64');
684
+ } finally {
685
+ // Clean up temp file
686
+ try { fs.unlinkSync(tmpFile); } catch {}
687
+ }
688
+ }
689
+
690
+ // ── Focus window (platform-specific) ───────────────────────────────
691
+
692
+ async _focusWindow(data) {
693
+ const title = data.title || '';
694
+ const timeout = data.timeout || 30;
695
+
696
+ if (IS_WINDOWS) {
697
+ const scriptPath = path.join(__dirname, '..', 'focusWindow.ps1');
698
+ const action = data.action || 'Focus';
699
+ return await runScript(
700
+ ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, title, action],
701
+ timeout,
702
+ );
703
+ } else {
704
+ // Linux: try jumpapp first, then xdotool fallback
705
+ let result = await runScript(['jumpapp', '-f', title], Math.min(timeout, 10));
706
+ if (!result.success) {
707
+ result = await runScript(
708
+ ['xdotool', 'search', '--name', title, 'windowactivate'],
709
+ Math.min(timeout, 10),
710
+ );
711
+ }
712
+ return result;
713
+ }
714
+ }
715
+
716
+ // ── Get active window (platform-specific) ──────────────────────────
717
+
718
+ async _getActiveWindow(data) {
719
+ const timeout = data.timeout || 10;
720
+
721
+ if (IS_WINDOWS) {
722
+ const scriptPath = path.join(__dirname, '..', 'getActiveWindow.ps1');
723
+ const result = await runScript(
724
+ ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath],
725
+ timeout,
726
+ );
727
+ if (result.success) {
728
+ try {
729
+ return JSON.parse(result.stdout.trim());
730
+ } catch {
731
+ return result;
732
+ }
733
+ }
734
+ return result;
735
+ } else {
736
+ // Linux: xdotool
737
+ const result = await runScript(
738
+ ['bash', '-c', "xdotool getactivewindow getwindowname 2>/dev/null || echo ''"],
739
+ timeout,
740
+ );
741
+ return {
742
+ name: result.success ? result.stdout.trim() : '',
743
+ success: result.success,
744
+ };
745
+ }
746
+ }
747
+
748
+ // ── Network info (platform-specific) ───────────────────────────────
749
+
750
+ async _getNetwork(data) {
751
+ const timeout = data.timeout || 10;
752
+
753
+ if (IS_WINDOWS) {
754
+ const scriptPath = path.join(__dirname, '..', 'network.ps1');
755
+ const result = await runScript(
756
+ ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath],
757
+ timeout,
758
+ );
759
+ if (result.success) {
760
+ try {
761
+ return JSON.parse(result.stdout.trim());
762
+ } catch {
763
+ return result;
764
+ }
765
+ }
766
+ return result;
767
+ } else {
768
+ const result = await runScript(
769
+ ['bash', '-c', "ip -j addr show 2>/dev/null || ifconfig -a 2>/dev/null || echo '{}'"],
770
+ timeout,
771
+ );
772
+ return {
773
+ stdout: result.stdout,
774
+ success: result.success,
775
+ };
776
+ }
777
+ }
778
+
779
+ // ── Cleanup ────────────────────────────────────────────────────────
780
+
781
+ cleanup() {
782
+ this._shell.cleanup();
783
+ }
784
+ }
785
+
786
+ module.exports = { Automation };