@testdriverai/runner 7.8.0-test.50 → 7.8.0-test.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -280,9 +280,9 @@ class PresenceRunner {
280
280
  await new Promise((resolve, reject) => {
281
281
  this.ably.connection.on('connected', resolve);
282
282
  this.ably.connection.on('failed', (err) => {
283
- reject(new Error(`Ably connection failed: ${err?.reason?.message || 'unknown'}`));
283
+ reject(new Error(`Realtime connection failed: ${err?.reason?.message || 'unknown'}`));
284
284
  });
285
- setTimeout(() => reject(new Error('Ably connection timeout')), 30000);
285
+ setTimeout(() => reject(new Error('Realtime connection timeout')), 30000);
286
286
  });
287
287
 
288
288
  log('Connected to Ably');
@@ -291,7 +291,7 @@ class PresenceRunner {
291
291
  this.ably.connection.on((stateChange) => {
292
292
  const { current, previous, reason, retryIn } = stateChange;
293
293
  const reasonMsg = reason ? (reason.message || reason.code || String(reason)) : undefined;
294
- log(`[ably] Presence connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
294
+ log(`[realtime] Presence connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
295
295
  });
296
296
 
297
297
  // Get runner channel and enter presence
@@ -201,10 +201,10 @@ class AblyService extends EventEmitter {
201
201
  resolve();
202
202
  });
203
203
  this._ably.connection.on('failed', () => {
204
- reject(new Error('Ably connection failed'));
204
+ reject(new Error('Realtime connection failed'));
205
205
  });
206
206
  setTimeout(() => {
207
- reject(new Error('Ably connection timeout (30s)'));
207
+ reject(new Error('Realtime connection timeout (30s)'));
208
208
  }, 30000);
209
209
  });
210
210
 
@@ -334,7 +334,7 @@ class AblyService extends EventEmitter {
334
334
  };
335
335
  this._commandSubscription = await this._sessionChannel.subscribe('command', this._onCommandMsg);
336
336
 
337
- // ─── Ably connection state monitoring → Sentry ─────────────────────────
337
+ // ─── Realtime connection state monitoring → Sentry ─────────────────────────
338
338
  this._ably.connection.on((stateChange) => {
339
339
  const { current, previous, reason, retryIn } = stateChange;
340
340
  const reasonMsg = reason ? (reason.message || reason.code || String(reason)) : undefined;
@@ -349,28 +349,28 @@ class AblyService extends EventEmitter {
349
349
  // Preserve original behavior
350
350
  if (current === 'disconnected') {
351
351
  this._connected = false;
352
- this.emit('log', `Ably connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
352
+ this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
353
353
  this.emit('log', 'Ably disconnected — will auto-reconnect');
354
354
  } else if (current === 'connected' && previous !== 'initialized') {
355
355
  if (!this._connected) {
356
356
  this._connected = true;
357
- this.emit('log', `Ably connection: ${previous} → ${current}`);
357
+ this.emit('log', `Realtime connection: ${previous} → ${current}`);
358
358
  this.emit('log', 'Ably reconnected');
359
359
  }
360
360
  } else if (current === 'failed') {
361
361
  this._connected = false;
362
- this.emit('log', `Ably connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
363
- this.emit('error', new Error('Ably connection failed'));
362
+ this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
363
+ this.emit('error', new Error('Realtime connection failed'));
364
364
  } else if (current === 'suspended') {
365
365
  this._connected = false;
366
- this.emit('log', `Ably connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
366
+ this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
367
367
  this.emit('log', 'Ably suspended — connection lost for extended period, will keep retrying');
368
368
  } else if (current === 'closed') {
369
369
  this._connected = false;
370
- this.emit('log', `Ably connection: ${previous} → ${current}`);
370
+ this.emit('log', `Realtime connection: ${previous} → ${current}`);
371
371
  this.emit('disconnected');
372
372
  } else {
373
- this.emit('log', `Ably connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
373
+ this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
374
374
  }
375
375
 
376
376
  // Capture exceptions for bad states
@@ -380,7 +380,7 @@ class AblyService extends EventEmitter {
380
380
  scope.setTag('ably.state', current);
381
381
  scope.setTag('sandbox.id', this._sandboxId);
382
382
  scope.setContext('ably_connection', { from: previous, to: current, reason: reasonMsg, retryIn });
383
- const err = reason instanceof Error ? reason : new Error('Ably connection state error');
383
+ const err = reason instanceof Error ? reason : new Error('Realtime connection state error');
384
384
  err.name = 'AblyConnectionError';
385
385
  Sentry.captureException(err);
386
386
  });
@@ -651,7 +651,7 @@ class AblyService extends EventEmitter {
651
651
  * Disconnect from Ably and clean up.
652
652
  */
653
653
  async close() {
654
- this.emit('log', 'Closing Ably service...');
654
+ this.emit('log', 'Closing realtime service...');
655
655
 
656
656
  this._stopReadySignal();
657
657
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@testdriverai/runner",
3
- "version": "7.8.0-test.50",
3
+ "version": "7.8.0-test.52",
4
4
  "description": "TestDriver Runner - Ably-based remote automation agent with Node.js automation",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -37,6 +37,9 @@
37
37
  "sharp": "^0.33.0",
38
38
  "uuid": "^9.0.0"
39
39
  },
40
+ "publishConfig": {
41
+ "access": "public"
42
+ },
40
43
  "devDependencies": {
41
44
  "e2b": "^2.12.1"
42
45
  }
package/sandbox-agent.js CHANGED
@@ -236,8 +236,8 @@ async function main() {
236
236
  updateInfo: null, // sandbox-agent doesn't do self-update checks
237
237
  });
238
238
 
239
- ablyService.on('log', (msg) => log(`[ably] ${msg}`));
240
- ablyService.on('error', (err) => log(`[ably] ERROR: ${err.message}`));
239
+ ablyService.on('log', (msg) => log(`[realtime] ${msg}`));
240
+ ablyService.on('error', (err) => log(`[realtime] ERROR: ${err.message}`));
241
241
 
242
242
  await ablyService.connect();
243
243
  log('Agent ready — listening for commands via Ably');
@@ -1,882 +0,0 @@
1
- /**
2
- * automation.js — Cross-platform desktop automation module (Node.js)
3
- *
4
- * Drop-in replacement for pyautogui-cli.py. Uses @nut-tree/nut-js for
5
- * mouse, keyboard, and screenshot operations. Uses child_process for
6
- * exec, focus-window, get-active-window, and network commands.
7
- *
8
- * Same command interface as pyautogui-cli.py:
9
- * dispatch(command, data) → Promise<result>
10
- *
11
- * Protocol (matches pyautogui-cli.py):
12
- * Returns the "result" value on success (true, base64 string, object, etc.)
13
- * Throws on error (caller wraps into { error: message })
14
- */
15
- const { spawn } = require('child_process');
16
- const path = require('path');
17
- const fs = require('fs');
18
- const os = require('os');
19
- const { EventEmitter } = require('events');
20
-
21
- const IS_WINDOWS = process.platform === 'win32';
22
- const IS_LINUX = process.platform === 'linux';
23
- const IS_MACOS = process.platform === 'darwin';
24
-
25
- // ─── Ensure DISPLAY is set on Linux ──────────────────────────────────────────
26
- // The entrypoint runs the runner as 'user' (via runuser) so it shares the X
27
- // session directly. DISPLAY should already be inherited, but set a fallback
28
- // just in case.
29
- if (IS_LINUX && !process.env.DISPLAY) {
30
- process.env.DISPLAY = ':0';
31
- }
32
-
33
- // ─── Configuration ───────────────────────────────────────────────────────────
34
-
35
- const API_ROOT = process.env.API_ROOT || process.env.TD_API_ROOT || 'https://api.testdriver.ai';
36
- const API_KEY = process.env.TD_API_KEY;
37
-
38
- // ─── Lazy-load @nut-tree-fork/nut-js (heavy native deps) ────────────────────
39
-
40
- let _nut = null;
41
- function getNut() {
42
- if (!_nut) {
43
- _nut = require('@nut-tree-fork/nut-js');
44
- // Disable nut.js automatic mouse speed — we want instant moves like pyautogui
45
- _nut.mouse.config.mouseSpeed = 0;
46
- _nut.mouse.config.autoDelayMs = 0;
47
- _nut.keyboard.config.autoDelayMs = 0;
48
- }
49
- return _nut;
50
- }
51
-
52
- // ─── Key name mapping: pyautogui key names → nut-js Key enum ─────────────────
53
-
54
- function mapKey(keyName) {
55
- const { Key } = getNut();
56
-
57
- // Direct mappings for common keys
58
- const keyMap = {
59
- // Letters
60
- 'a': Key.A, 'b': Key.B, 'c': Key.C, 'd': Key.D, 'e': Key.E,
61
- 'f': Key.F, 'g': Key.G, 'h': Key.H, 'i': Key.I, 'j': Key.J,
62
- 'k': Key.K, 'l': Key.L, 'm': Key.M, 'n': Key.N, 'o': Key.O,
63
- 'p': Key.P, 'q': Key.Q, 'r': Key.R, 's': Key.S, 't': Key.T,
64
- 'u': Key.U, 'v': Key.V, 'w': Key.W, 'x': Key.X, 'y': Key.Y,
65
- 'z': Key.Z,
66
-
67
- // Numbers
68
- '0': Key.Num0, '1': Key.Num1, '2': Key.Num2, '3': Key.Num3,
69
- '4': Key.Num4, '5': Key.Num5, '6': Key.Num6, '7': Key.Num7,
70
- '8': Key.Num8, '9': Key.Num9,
71
-
72
- // Function keys
73
- 'f1': Key.F1, 'f2': Key.F2, 'f3': Key.F3, 'f4': Key.F4,
74
- 'f5': Key.F5, 'f6': Key.F6, 'f7': Key.F7, 'f8': Key.F8,
75
- 'f9': Key.F9, 'f10': Key.F10, 'f11': Key.F11, 'f12': Key.F12,
76
-
77
- // Modifiers
78
- 'shift': Key.LeftShift, 'shiftleft': Key.LeftShift, 'shiftright': Key.RightShift,
79
- 'ctrl': Key.LeftControl, 'ctrlleft': Key.LeftControl, 'ctrlright': Key.RightControl,
80
- 'control': Key.LeftControl,
81
- 'alt': Key.LeftAlt, 'altleft': Key.LeftAlt, 'altright': Key.RightAlt,
82
- 'command': Key.LeftSuper, 'cmd': Key.LeftSuper, 'super': Key.LeftSuper,
83
- 'win': Key.LeftSuper, 'winleft': Key.LeftSuper, 'winright': Key.RightSuper,
84
- 'meta': Key.LeftSuper,
85
-
86
- // Navigation
87
- 'enter': Key.Enter, 'return': Key.Enter,
88
- 'tab': Key.Tab,
89
- 'space': Key.Space, ' ': Key.Space,
90
- 'backspace': Key.Backspace,
91
- 'delete': Key.Delete, 'del': Key.Delete,
92
- 'escape': Key.Escape, 'esc': Key.Escape,
93
- 'insert': Key.Insert,
94
- 'home': Key.Home, 'end': Key.End,
95
- 'pageup': Key.PageUp, 'pagedown': Key.PageDown,
96
-
97
- // Arrow keys
98
- 'up': Key.Up, 'down': Key.Down, 'left': Key.Left, 'right': Key.Right,
99
-
100
- // Symbols
101
- 'minus': Key.Minus, '-': Key.Minus,
102
- 'equal': Key.Equal, '=': Key.Equal,
103
- 'bracketleft': Key.LeftBracket, '[': Key.LeftBracket,
104
- 'bracketright': Key.RightBracket, ']': Key.RightBracket,
105
- 'backslash': Key.Backslash, '\\': Key.Backslash,
106
- 'semicolon': Key.Semicolon, ';': Key.Semicolon,
107
- 'quote': Key.Quote, "'": Key.Quote,
108
- 'comma': Key.Comma, ',': Key.Comma,
109
- 'period': Key.Period, '.': Key.Period,
110
- 'slash': Key.Slash, '/': Key.Slash,
111
- 'grave': Key.Grave, '`': Key.Grave,
112
-
113
- // Misc
114
- 'capslock': Key.CapsLock,
115
- 'numlock': Key.NumLock,
116
- 'scrolllock': Key.ScrollLock,
117
- 'printscreen': Key.Print,
118
- 'pause': Key.Pause,
119
- };
120
-
121
- const mapped = keyMap[keyName.toLowerCase()];
122
- if (mapped !== undefined) return mapped;
123
-
124
- // Fallback: try uppercase enum lookup
125
- const upper = keyName.charAt(0).toUpperCase() + keyName.slice(1);
126
- if (Key[upper] !== undefined) return Key[upper];
127
-
128
- throw new Error(`Unknown key: ${keyName}`);
129
- }
130
-
131
- // ─── Persistent shell session ────────────────────────────────────────────────
132
-
133
- class ShellSession {
134
- constructor() {
135
- this._process = null;
136
- this._stderrLines = [];
137
- this._lock = false;
138
- }
139
-
140
- _ensureStarted() {
141
- if (this._process && this._process.exitCode === null) return;
142
-
143
- let cmd;
144
- let env;
145
-
146
- if (IS_WINDOWS) {
147
- cmd = ['powershell', ['-NoExit', '-ExecutionPolicy', 'Bypass', '-Command', '-']];
148
- } else {
149
- cmd = ['/bin/bash', ['--norc', '--noprofile', '-i']];
150
- }
151
- env = { ...process.env };
152
-
153
- this._process = spawn(cmd[0], cmd[1], {
154
- stdio: ['pipe', 'pipe', 'pipe'],
155
- env,
156
- });
157
-
158
- this._process.stderr.on('data', (chunk) => {
159
- this._stderrLines.push(chunk.toString());
160
- });
161
-
162
- this._process.on('error', (err) => {
163
- console.error(`[automation] Shell error: ${err.message}`);
164
- });
165
-
166
- this._process.on('exit', (code) => {
167
- console.log(`[automation] Shell exited (code=${code})`);
168
- this._process = null;
169
- });
170
- }
171
-
172
- /**
173
- * Run a command in the persistent shell session.
174
- * @param {string} command - The command to run
175
- * @param {number} [timeout=30] - Timeout in seconds
176
- * @param {object} [options] - Additional options
177
- * @param {function} [options.onChunk] - Callback for streaming stdout chunks: (text: string) => void
178
- * @returns {Promise<{stdout: string, stderr: string, returncode: number, success: boolean}>}
179
- */
180
- async run(command, timeout = 30, options = {}) {
181
- const { onChunk } = options;
182
-
183
- // Wait for any in-flight command (simple serial lock)
184
- while (this._lock) {
185
- await new Promise(r => setTimeout(r, 50));
186
- }
187
- this._lock = true;
188
-
189
- try {
190
- this._ensureStarted();
191
-
192
- const delimiter = `###END_OF_OUTPUT_${Date.now()}_${Math.random().toString(36).slice(2)}###`;
193
- const fullCommand = IS_WINDOWS
194
- ? `${command}\nWrite-Output '${delimiter}'\n`
195
- : `${command}\necho '${delimiter}'\n`;
196
-
197
- // Clear stderr buffer
198
- this._stderrLines = [];
199
-
200
- return await new Promise((resolve, reject) => {
201
- const timeoutMs = timeout * 1000;
202
- const timer = setTimeout(() => {
203
- cleanup();
204
- resolve({
205
- stdout: '',
206
- stderr: 'Command timed out',
207
- returncode: -1,
208
- success: false,
209
- });
210
- }, timeoutMs);
211
-
212
- const stdoutLines = [];
213
-
214
- const onData = (chunk) => {
215
- const text = chunk.toString();
216
- const lines = text.split('\n');
217
- for (const line of lines) {
218
- if (line.includes(delimiter)) {
219
- cleanup();
220
- const stdout = stdoutLines.join('\n').replace(/\n$/, '');
221
- const stderr = this._stderrLines.join('').replace(/\n$/, '');
222
- resolve({
223
- stdout,
224
- stderr,
225
- returncode: 0,
226
- success: true,
227
- });
228
- return;
229
- }
230
- stdoutLines.push(line);
231
- }
232
-
233
- // Stream chunk to caller if callback provided
234
- if (onChunk) {
235
- try {
236
- onChunk(text);
237
- } catch (e) {
238
- // Don't let streaming errors break the command
239
- console.warn('[shell] onChunk callback error:', e.message);
240
- }
241
- }
242
- };
243
-
244
- const cleanup = () => {
245
- clearTimeout(timer);
246
- this._process.stdout.removeListener('data', onData);
247
- };
248
-
249
- this._process.stdout.on('data', onData);
250
- this._process.stdin.write(fullCommand);
251
- });
252
- } catch (err) {
253
- return {
254
- stdout: '',
255
- stderr: err.message,
256
- returncode: -1,
257
- success: false,
258
- };
259
- } finally {
260
- this._lock = false;
261
- }
262
- }
263
-
264
- cleanup() {
265
- if (this._process) {
266
- try {
267
- this._process.stdin.end();
268
- this._process.kill();
269
- } catch {}
270
- this._process = null;
271
- }
272
- }
273
- }
274
-
275
- // ─── Script runner helper (matches pyautogui-cli.py's run_script) ────────────
276
-
277
- function runScript(args, timeout = 30) {
278
- return new Promise((resolve) => {
279
- const spawnArgs = args;
280
- const env = { ...process.env };
281
-
282
- const proc = spawn(spawnArgs[0], spawnArgs.slice(1), {
283
- stdio: ['ignore', 'pipe', 'pipe'],
284
- env,
285
- });
286
-
287
- let stdout = '';
288
- let stderr = '';
289
-
290
- proc.stdout.on('data', (chunk) => { stdout += chunk.toString(); });
291
- proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
292
-
293
- const timer = setTimeout(() => {
294
- proc.kill();
295
- resolve({
296
- stdout: '',
297
- stderr: 'Command timed out',
298
- returncode: -1,
299
- success: false,
300
- });
301
- }, timeout * 1000);
302
-
303
- proc.on('close', (code) => {
304
- clearTimeout(timer);
305
- resolve({
306
- stdout,
307
- stderr,
308
- returncode: code,
309
- success: code === 0,
310
- });
311
- });
312
-
313
- proc.on('error', (err) => {
314
- clearTimeout(timer);
315
- resolve({
316
- stdout: '',
317
- stderr: err.message,
318
- returncode: -1,
319
- success: false,
320
- });
321
- });
322
- });
323
- }
324
-
325
- // ─── Automation class ────────────────────────────────────────────────────────
326
-
327
- class Automation extends EventEmitter {
328
- constructor(options = {}) {
329
- super();
330
- this._shell = new ShellSession();
331
- this._sandboxId = options.sandboxId;
332
- this._apiRoot = options.apiRoot;
333
- this._apiKey = options.apiKey;
334
- this._logicalScreen = null; // cached {width, height} of logical screen
335
- }
336
-
337
- /**
338
- * Get the logical screen dimensions (cached).
339
- */
340
- async _getLogicalScreen() {
341
- if (this._logicalScreen) return this._logicalScreen;
342
- try {
343
- const { screen } = getNut();
344
- const w = await screen.width();
345
- const h = await screen.height();
346
- this._logicalScreen = { width: w, height: h };
347
- console.log(`[automation] Logical screen: ${w}x${h}`);
348
- } catch (err) {
349
- console.warn('[automation] Could not determine screen size, using 1366x768:', err.message);
350
- this._logicalScreen = { width: 1366, height: 768 };
351
- }
352
- return this._logicalScreen;
353
- }
354
-
355
- /**
356
- * Scale coordinates from SDK space (TD_RESOLUTION = 1366x768) to
357
- * logical mouse coordinate space. The SDK always resizes screenshots
358
- * to 1366x768 before sending to AI, so AI returns coordinates in that
359
- * space. nut-js mouse operates in logical screen coordinates.
360
- * On cloud sandboxes (1366x768 resolution), this is a no-op.
361
- */
362
- async _scaleCoords(x, y) {
363
- const SDK_WIDTH = 1366;
364
- const SDK_HEIGHT = 768;
365
- const logical = await this._getLogicalScreen();
366
- if (logical.width === SDK_WIDTH && logical.height === SDK_HEIGHT) {
367
- return { x, y };
368
- }
369
- return {
370
- x: Math.round(x * (logical.width / SDK_WIDTH)),
371
- y: Math.round(y * (logical.height / SDK_HEIGHT)),
372
- };
373
- }
374
-
375
- /**
376
- * Release all modifier keys to prevent stuck keys.
377
- * Mirrors pyautogui-cli.py's release_modifiers().
378
- */
379
- async releaseModifiers() {
380
- const { keyboard, Key } = getNut();
381
- const modifiers = [
382
- Key.LeftShift, Key.RightShift,
383
- Key.LeftControl, Key.RightControl,
384
- Key.LeftAlt, Key.RightAlt,
385
- Key.LeftSuper, Key.RightSuper,
386
- ];
387
- for (const mod of modifiers) {
388
- try {
389
- await keyboard.releaseKey(mod);
390
- } catch {}
391
- }
392
- }
393
-
394
- /**
395
- * Dispatch a command by name — same interface as pyautogui-cli.py's dispatch_command.
396
- * @param {string} command - Command name (e.g. 'click', 'screenshot', 'exec')
397
- * @param {object} data - Command data/parameters
398
- * @returns {Promise<*>} Result value (true, base64 string, object, etc.)
399
- */
400
- async dispatch(command, data = {}) {
401
- const { mouse, keyboard, screen, Button, Key, Point } = getNut();
402
- // Normalize command: strip 'commands.' or 'system.' prefix
403
- let normalizedCommand = command;
404
- if (command && command.startsWith('commands.')) {
405
- normalizedCommand = command.slice('commands.'.length);
406
- } else if (command && command.startsWith('system.')) {
407
- normalizedCommand = command.slice('system.'.length);
408
- }
409
- // Normalize camelCase to lowercase (e.g. leftClick → leftclick, mouseDown → mousedown)
410
- normalizedCommand = normalizedCommand.toLowerCase();
411
-
412
- console.log(`[automation] dispatch: "${command}" → "${normalizedCommand}", data keys: ${Object.keys(data).join(',')}`);
413
- this.emit('log', `dispatch: "${command}" → "${normalizedCommand}"`);
414
- if (normalizedCommand === 'write') {
415
- console.log(`[automation] write text: "${data.text}" (length: ${data.text?.length})`);
416
- this.emit('log', `write text: "${data.text}" (length: ${data.text?.length})`);
417
- }
418
- if (normalizedCommand === 'press') {
419
- console.log(`[automation] press keys:`, JSON.stringify(data.keys || data.key));
420
- this.emit('log', `press keys: ${JSON.stringify(data.keys || data.key)}`);
421
- }
422
-
423
- switch (normalizedCommand) {
424
- case 'move':
425
- case 'movemouse': {
426
- const mv = await this._scaleCoords(data.x, data.y);
427
- console.log(`[automation] move: raw(${data.x},${data.y}) → scaled(${mv.x},${mv.y})`);
428
- await mouse.setPosition(new Point(mv.x, mv.y));
429
- return true;
430
- }
431
-
432
- case 'moverel': {
433
- const pos = await mouse.getPosition();
434
- const dr = await this._scaleCoords(data.x, data.y);
435
- await mouse.setPosition(new Point(pos.x + dr.x, pos.y + dr.y));
436
- return true;
437
- }
438
-
439
- case 'click':
440
- case 'leftclick': {
441
- const btn = this._mapButton(data.button || 'left');
442
- if (data.x != null && data.y != null) {
443
- const sc = await this._scaleCoords(data.x, data.y);
444
- console.log(`[automation] click: raw(${data.x},${data.y}) → scaled(${sc.x},${sc.y})`);
445
- await mouse.setPosition(new Point(sc.x, sc.y));
446
- }
447
- await mouse.click(btn);
448
- return true;
449
- }
450
-
451
- case 'rightclick': {
452
- if (data.x != null && data.y != null) {
453
- const sc = await this._scaleCoords(data.x, data.y);
454
- await mouse.setPosition(new Point(sc.x, sc.y));
455
- }
456
- await mouse.click(Button.RIGHT);
457
- return true;
458
- }
459
-
460
- case 'middleclick': {
461
- if (data.x != null && data.y != null) {
462
- const sc = await this._scaleCoords(data.x, data.y);
463
- await mouse.setPosition(new Point(sc.x, sc.y));
464
- }
465
- await mouse.click(Button.MIDDLE);
466
- return true;
467
- }
468
-
469
- case 'doubleclick': {
470
- if (data.x != null && data.y != null) {
471
- const sc = await this._scaleCoords(data.x, data.y);
472
- await mouse.setPosition(new Point(sc.x, sc.y));
473
- }
474
- await mouse.doubleClick(Button.LEFT);
475
- return true;
476
- }
477
-
478
- case 'tripleclick': {
479
- if (data.x != null && data.y != null) {
480
- const sc = await this._scaleCoords(data.x, data.y);
481
- await mouse.setPosition(new Point(sc.x, sc.y));
482
- }
483
- // nut-js doesn't have tripleClick — simulate with 3 rapid clicks
484
- await mouse.click(Button.LEFT);
485
- await mouse.click(Button.LEFT);
486
- await mouse.click(Button.LEFT);
487
- return true;
488
- }
489
-
490
- case 'mousedown':
491
- case 'mousepress': {
492
- const btn = this._mapButton(data.button || 'left');
493
- if (data.x != null && data.y != null) {
494
- const sc = await this._scaleCoords(data.x, data.y);
495
- await mouse.setPosition(new Point(sc.x, sc.y));
496
- }
497
- await mouse.pressButton(btn);
498
- return true;
499
- }
500
-
501
- case 'mouseup':
502
- case 'mouserelease': {
503
- const btn = this._mapButton(data.button || 'left');
504
- if (data.x != null && data.y != null) {
505
- const sc = await this._scaleCoords(data.x, data.y);
506
- await mouse.setPosition(new Point(sc.x, sc.y));
507
- }
508
- await mouse.releaseButton(btn);
509
- return true;
510
- }
511
-
512
- case 'scroll': {
513
- const amount = data.amount || 0;
514
- // nut-js scrollDown/scrollUp take positive values
515
- if (amount > 0) {
516
- await mouse.scrollUp(Math.abs(amount));
517
- } else if (amount < 0) {
518
- await mouse.scrollDown(Math.abs(amount));
519
- }
520
- return true;
521
- }
522
-
523
- case 'write': {
524
- // Type each character — matches pyautogui.write() behavior (types literal characters)
525
- await keyboard.type(data.text);
526
- await this.releaseModifiers();
527
- return true;
528
- }
529
-
530
- case 'press': {
531
- // SDK sends { keys: [...] } (array) or { key: 'x' } (singular)
532
- if (data.keys && Array.isArray(data.keys)) {
533
- // Multiple keys = hotkey/combo
534
- const keys = data.keys.map(k => mapKey(k));
535
- for (const key of keys) {
536
- await keyboard.pressKey(key);
537
- }
538
- for (const key of keys.reverse()) {
539
- await keyboard.releaseKey(key);
540
- }
541
- } else {
542
- const key = mapKey(data.key || data.keys);
543
- await keyboard.pressKey(key);
544
- await keyboard.releaseKey(key);
545
- }
546
- await this.releaseModifiers();
547
- return true;
548
- }
549
-
550
- case 'hotkey': {
551
- const keys = data.keys.map(k => mapKey(k));
552
- // Press all keys down, then release in reverse order
553
- for (const key of keys) {
554
- await keyboard.pressKey(key);
555
- }
556
- for (const key of keys.reverse()) {
557
- await keyboard.releaseKey(key);
558
- }
559
- await this.releaseModifiers();
560
- return true;
561
- }
562
-
563
- case 'keydown': {
564
- const key = mapKey(data.key);
565
- await keyboard.pressKey(key);
566
- return true;
567
- }
568
-
569
- case 'keyup': {
570
- const key = mapKey(data.key);
571
- await keyboard.releaseKey(key);
572
- return true;
573
- }
574
-
575
- case 'get-mouse-position': {
576
- const pos = await mouse.getPosition();
577
- return { x: pos.x, y: pos.y };
578
- }
579
-
580
- case 'get-screen-size': {
581
- const region = await screen.width();
582
- const height = await screen.height();
583
- return { width: region, height };
584
- }
585
-
586
- case 'alert': {
587
- // No direct equivalent in Node — execute via shell
588
- if (IS_LINUX) {
589
- await runScript(['zenity', '--info', '--text', data.text], 10);
590
- } else if (IS_MACOS) {
591
- await runScript(['osascript', '-e', `display dialog "${data.text}"`], 10);
592
- } else {
593
- await runScript(['powershell', '-Command', `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.MessageBox]::Show("${data.text}")`], 10);
594
- }
595
- return true;
596
- }
597
-
598
- case 'focus-window': {
599
- return await this._focusWindow(data);
600
- }
601
-
602
- case 'focus-application': {
603
- return await this._focusWindow({
604
- ...data,
605
- title: data.title || data.name || data.appName || '',
606
- });
607
- }
608
-
609
- case 'get-active-window': {
610
- return await this._getActiveWindow(data);
611
- }
612
-
613
- case 'network': {
614
- return await this._getNetwork(data);
615
- }
616
-
617
- case 'exec': {
618
- const timeout = data.timeout || 30;
619
- return await this._shell.run(data.command, timeout);
620
- }
621
-
622
- // SDK sends 'commands.run' for exec — map to shell command
623
- case 'run': {
624
- const timeout = Math.ceil((data.timeout || 300000) / 1000); // ms to seconds
625
- const requestId = data.requestId;
626
- try {
627
- const result = await this._shell.run(data.command, timeout, {
628
- onChunk: (text) => {
629
- // Emit streaming chunk so AblyService can forward to SDK
630
- this.emit('exec.output', { requestId, chunk: text });
631
- },
632
- });
633
- // SDK expects { out: { returncode, stdout, stderr } }
634
- // ShellSession.run() already returns { stdout, stderr, returncode, success }
635
- return {
636
- out: {
637
- returncode: result.returncode,
638
- stdout: result.stdout,
639
- stderr: result.stderr,
640
- },
641
- };
642
- } catch (err) {
643
- return {
644
- out: {
645
- returncode: 1,
646
- stdout: '',
647
- stderr: err.message,
648
- },
649
- };
650
- }
651
- }
652
-
653
- case 'extract':
654
- case 'remember':
655
- case 'screenshot': {
656
- console.log(`[automation] Handling ${normalizedCommand} command - will capture and upload to S3`);
657
- // For extract/remember: capture screenshot, upload to S3, return s3Key
658
- // SDK will then call API with the s3Key
659
- // For system.screenshot (normalized to 'screenshot'): same flow (return s3Key instead of base64)
660
-
661
- // Capture screenshot locally
662
- console.log('[automation] Step 1: Capturing screenshot...');
663
- const screenshot = await this._captureScreenshot();
664
- console.log(`[automation] Step 2: Screenshot captured, size: ${screenshot.length} bytes (base64)`);
665
- const buffer = Buffer.from(screenshot, 'base64');
666
- console.log(`[automation] Step 3: Buffer created, size: ${buffer.length} bytes`);
667
-
668
- // Upload screenshot to S3 and return key
669
- console.log('[automation] Step 4: Uploading to S3...');
670
- const s3Key = await this._uploadToS3(buffer, this._sandboxId, 'image/png');
671
- console.log(`[automation] Step 5: Upload complete, s3Key: ${s3Key}`);
672
-
673
- return { s3Key };
674
- }
675
-
676
- case 'ping':
677
- return 'pong';
678
-
679
- default:
680
- throw new Error(`Unknown command: ${command}`);
681
- }
682
- }
683
-
684
- // ── Button mapping ─────────────────────────────────────────────────
685
-
686
- _mapButton(button) {
687
- const { Button } = getNut();
688
- switch ((button || 'left').toLowerCase()) {
689
- case 'left': return Button.LEFT;
690
- case 'right': return Button.RIGHT;
691
- case 'middle': return Button.MIDDLE;
692
- default: return Button.LEFT;
693
- }
694
- }
695
-
696
- // ── S3 Upload (for large screenshots that exceed Ably limits) ──────
697
-
698
- async _uploadToS3(buffer, sandboxId, contentType = 'image/jpeg') {
699
- if (!API_KEY || !sandboxId) {
700
- throw new Error('API_KEY and sandboxId required for S3 upload');
701
- }
702
-
703
- // Get presigned URL from API (30s timeout)
704
- const response = await fetch(`${API_ROOT}/api/v7/runner/upload-url`, {
705
- method: 'POST',
706
- headers: { 'Content-Type': 'application/json' },
707
- body: JSON.stringify({
708
- apiKey: API_KEY,
709
- sandboxId,
710
- contentType,
711
- }),
712
- signal: AbortSignal.timeout(30000),
713
- });
714
-
715
- if (!response.ok) {
716
- const text = await response.text();
717
- throw new Error(`Failed to get upload URL: ${response.status} ${text}`);
718
- }
719
-
720
- const { uploadUrl, s3Key } = await response.json();
721
- console.log(`[automation] Got S3 upload URL, s3Key: ${s3Key}`);
722
-
723
- // Upload to S3 (30s timeout)
724
- const uploadResponse = await fetch(uploadUrl, {
725
- method: 'PUT',
726
- headers: { 'Content-Type': contentType },
727
- body: buffer,
728
- signal: AbortSignal.timeout(30000),
729
- });
730
-
731
- if (!uploadResponse.ok) {
732
- console.error(`[automation] S3 upload failed: status=${uploadResponse.status}`);
733
- throw new Error(`S3 upload failed: ${uploadResponse.status}`);
734
- }
735
-
736
- console.log(`[automation] S3 upload successful, returning s3Key: ${s3Key}`);
737
- return s3Key;
738
- }
739
-
740
- // ── Screenshot (highest quality PNG, uploaded via S3) ──────────────
741
-
742
- async _captureScreenshot() {
743
- const { screen } = getNut();
744
- const sharp = require('sharp');
745
-
746
- // Capture screen via nut-js with timeout — screen.grab() can hang if display is unavailable
747
- const image = await Promise.race([
748
- screen.grab(),
749
- new Promise((_, reject) =>
750
- setTimeout(() => reject(new Error(
751
- 'Screenshot capture timed out after 15s — display may be unavailable'
752
- )), 15000)
753
- ),
754
- ]);
755
-
756
- // image.data is raw RGBA pixel buffer, image.width/height are physical pixels
757
- const physicalWidth = image.width;
758
- const physicalHeight = image.height;
759
-
760
- // Get logical screen size for Retina handling
761
- const logicalWidth = await screen.width();
762
- const logicalHeight = await screen.height();
763
-
764
- // Create sharp pipeline
765
- let pipeline = sharp(Buffer.from(image.data), {
766
- raw: {
767
- width: physicalWidth,
768
- height: physicalHeight,
769
- channels: 4,
770
- },
771
- });
772
-
773
- // Resize from physical to logical pixels if needed (Retina displays)
774
- if (physicalWidth !== logicalWidth || physicalHeight !== logicalHeight) {
775
- pipeline = pipeline.resize(logicalWidth, logicalHeight);
776
- }
777
-
778
- // Encode as lossless PNG with no compression
779
- const buffer = await pipeline.png({ compressionLevel: 0 }).toBuffer();
780
-
781
- return buffer.toString('base64');
782
- }
783
-
784
- // ── Focus window (platform-specific) ───────────────────────────────
785
-
786
- async _focusWindow(data) {
787
- const title = data.title || '';
788
- const timeout = data.timeout || 30;
789
-
790
- if (IS_WINDOWS) {
791
- const scriptPath = path.join(__dirname, '..', 'focusWindow.ps1');
792
- const action = data.action || 'Focus';
793
- return await runScript(
794
- ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, title, action],
795
- timeout,
796
- );
797
- } else {
798
- // Linux: try jumpapp first, then xdotool fallback
799
- let result = await runScript(['jumpapp', '-f', title], Math.min(timeout, 10));
800
- if (!result.success) {
801
- result = await runScript(
802
- ['xdotool', 'search', '--name', title, 'windowactivate'],
803
- Math.min(timeout, 10),
804
- );
805
- }
806
- return result;
807
- }
808
- }
809
-
810
- // ── Get active window (platform-specific) ──────────────────────────
811
-
812
- async _getActiveWindow(data) {
813
- const timeout = data.timeout || 10;
814
-
815
- if (IS_WINDOWS) {
816
- const scriptPath = path.join(__dirname, '..', 'getActiveWindow.ps1');
817
- const result = await runScript(
818
- ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath],
819
- timeout,
820
- );
821
- if (result.success) {
822
- try {
823
- return JSON.parse(result.stdout.trim());
824
- } catch {
825
- return result;
826
- }
827
- }
828
- return result;
829
- } else {
830
- // Linux: xdotool
831
- const result = await runScript(
832
- ['bash', '-c', "xdotool getactivewindow getwindowname 2>/dev/null || echo ''"],
833
- timeout,
834
- );
835
- return {
836
- name: result.success ? result.stdout.trim() : '',
837
- success: result.success,
838
- };
839
- }
840
- }
841
-
842
- // ── Network info (platform-specific) ───────────────────────────────
843
-
844
- async _getNetwork(data) {
845
- const timeout = data.timeout || 10;
846
-
847
- if (IS_WINDOWS) {
848
- const scriptPath = path.join(__dirname, '..', 'network.ps1');
849
- const result = await runScript(
850
- ['powershell', '-ExecutionPolicy', 'Bypass', '-File', scriptPath],
851
- timeout,
852
- );
853
- if (result.success) {
854
- try {
855
- return JSON.parse(result.stdout.trim());
856
- } catch {
857
- return result;
858
- }
859
- }
860
- return result;
861
- } else {
862
- const result = await runScript(
863
- ['bash', '-c', "ip -j addr show 2>/dev/null || ifconfig -a 2>/dev/null || echo '{}'"],
864
- timeout,
865
- );
866
- return {
867
- stdout: result.stdout,
868
- success: result.success,
869
- };
870
- }
871
- }
872
-
873
- // NOTE: _captureScreenshot is defined above (with Retina handling)
874
-
875
- // ── Cleanup ────────────────────────────────────────────────────────
876
-
877
- cleanup() {
878
- this._shell.cleanup();
879
- }
880
- }
881
-
882
- module.exports = { Automation };