@testdriverai/runner 7.8.0-canary.14 → 7.8.0-canary.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +3 -3
- package/lib/ably-service.js +53 -15
- package/lib/automation.js +62 -25
- package/package.json +4 -1
- package/sandbox-agent.js +2 -2
- package/scripts-desktop/start-agent.sh +105 -0
- package/scripts-desktop/start-desktop.sh +147 -4
- package/lib/automation.js.bak +0 -882
package/index.js
CHANGED
|
@@ -280,9 +280,9 @@ class PresenceRunner {
|
|
|
280
280
|
await new Promise((resolve, reject) => {
|
|
281
281
|
this.ably.connection.on('connected', resolve);
|
|
282
282
|
this.ably.connection.on('failed', (err) => {
|
|
283
|
-
reject(new Error(`
|
|
283
|
+
reject(new Error(`Realtime connection failed: ${err?.reason?.message || 'unknown'}`));
|
|
284
284
|
});
|
|
285
|
-
setTimeout(() => reject(new Error('
|
|
285
|
+
setTimeout(() => reject(new Error('Realtime connection timeout')), 30000);
|
|
286
286
|
});
|
|
287
287
|
|
|
288
288
|
log('Connected to Ably');
|
|
@@ -291,7 +291,7 @@ class PresenceRunner {
|
|
|
291
291
|
this.ably.connection.on((stateChange) => {
|
|
292
292
|
const { current, previous, reason, retryIn } = stateChange;
|
|
293
293
|
const reasonMsg = reason ? (reason.message || reason.code || String(reason)) : undefined;
|
|
294
|
-
log(`[
|
|
294
|
+
log(`[realtime] Presence connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
|
|
295
295
|
});
|
|
296
296
|
|
|
297
297
|
// Get runner channel and enter presence
|
package/lib/ably-service.js
CHANGED
|
@@ -201,10 +201,10 @@ class AblyService extends EventEmitter {
|
|
|
201
201
|
resolve();
|
|
202
202
|
});
|
|
203
203
|
this._ably.connection.on('failed', () => {
|
|
204
|
-
reject(new Error('
|
|
204
|
+
reject(new Error('Realtime connection failed'));
|
|
205
205
|
});
|
|
206
206
|
setTimeout(() => {
|
|
207
|
-
reject(new Error('
|
|
207
|
+
reject(new Error('Realtime connection timeout (30s)'));
|
|
208
208
|
}, 30000);
|
|
209
209
|
});
|
|
210
210
|
|
|
@@ -275,6 +275,9 @@ class AblyService extends EventEmitter {
|
|
|
275
275
|
|
|
276
276
|
this.emit('log', `Command received: ${type} (requestId=${requestId})`);
|
|
277
277
|
|
|
278
|
+
// Stop re-publishing runner.ready once we get the first command
|
|
279
|
+
this._stopReadySignal();
|
|
280
|
+
|
|
278
281
|
// Per-command timeout: use message.timeout if provided, else default 120s
|
|
279
282
|
// Prevents hanging forever if screenshot capture or S3 upload stalls
|
|
280
283
|
const commandTimeout = (message.timeout && message.timeout > 0)
|
|
@@ -331,7 +334,7 @@ class AblyService extends EventEmitter {
|
|
|
331
334
|
};
|
|
332
335
|
this._commandSubscription = await this._sessionChannel.subscribe('command', this._onCommandMsg);
|
|
333
336
|
|
|
334
|
-
// ───
|
|
337
|
+
// ─── Realtime connection state monitoring → Sentry ─────────────────────────
|
|
335
338
|
this._ably.connection.on((stateChange) => {
|
|
336
339
|
const { current, previous, reason, retryIn } = stateChange;
|
|
337
340
|
const reasonMsg = reason ? (reason.message || reason.code || String(reason)) : undefined;
|
|
@@ -346,28 +349,28 @@ class AblyService extends EventEmitter {
|
|
|
346
349
|
// Preserve original behavior
|
|
347
350
|
if (current === 'disconnected') {
|
|
348
351
|
this._connected = false;
|
|
349
|
-
this.emit('log', `
|
|
352
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}${retryIn ? ' (retryIn=' + retryIn + 'ms)' : ''}`);
|
|
350
353
|
this.emit('log', 'Ably disconnected — will auto-reconnect');
|
|
351
354
|
} else if (current === 'connected' && previous !== 'initialized') {
|
|
352
355
|
if (!this._connected) {
|
|
353
356
|
this._connected = true;
|
|
354
|
-
this.emit('log', `
|
|
357
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}`);
|
|
355
358
|
this.emit('log', 'Ably reconnected');
|
|
356
359
|
}
|
|
357
360
|
} else if (current === 'failed') {
|
|
358
361
|
this._connected = false;
|
|
359
|
-
this.emit('log', `
|
|
360
|
-
this.emit('error', new Error('
|
|
362
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
|
|
363
|
+
this.emit('error', new Error('Realtime connection failed'));
|
|
361
364
|
} else if (current === 'suspended') {
|
|
362
365
|
this._connected = false;
|
|
363
|
-
this.emit('log', `
|
|
366
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
|
|
364
367
|
this.emit('log', 'Ably suspended — connection lost for extended period, will keep retrying');
|
|
365
368
|
} else if (current === 'closed') {
|
|
366
369
|
this._connected = false;
|
|
367
|
-
this.emit('log', `
|
|
370
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}`);
|
|
368
371
|
this.emit('disconnected');
|
|
369
372
|
} else {
|
|
370
|
-
this.emit('log', `
|
|
373
|
+
this.emit('log', `Realtime connection: ${previous} → ${current}${reasonMsg ? ' — ' + reasonMsg : ''}`);
|
|
371
374
|
}
|
|
372
375
|
|
|
373
376
|
// Capture exceptions for bad states
|
|
@@ -377,7 +380,7 @@ class AblyService extends EventEmitter {
|
|
|
377
380
|
scope.setTag('ably.state', current);
|
|
378
381
|
scope.setTag('sandbox.id', this._sandboxId);
|
|
379
382
|
scope.setContext('ably_connection', { from: previous, to: current, reason: reasonMsg, retryIn });
|
|
380
|
-
const err = reason instanceof Error ? reason : new Error('
|
|
383
|
+
const err = reason instanceof Error ? reason : new Error('Realtime connection state error');
|
|
381
384
|
err.name = 'AblyConnectionError';
|
|
382
385
|
Sentry.captureException(err);
|
|
383
386
|
});
|
|
@@ -415,8 +418,8 @@ class AblyService extends EventEmitter {
|
|
|
415
418
|
|
|
416
419
|
// Detect discontinuity: channel re-attached but message continuity was lost.
|
|
417
420
|
// Use historyBeforeSubscribe() on each subscription to recover missed messages.
|
|
418
|
-
if (current === 'attached' && stateChange.resumed === false && previous) {
|
|
419
|
-
this.emit('log', `Ably channel [session]: DISCONTINUITY (resumed=false)${reasonMsg ? ' — ' + reasonMsg : ''}`);
|
|
421
|
+
if (current === 'attached' && stateChange.resumed === false && previous === 'attached') {
|
|
422
|
+
this.emit('log', `Ably channel [session]: DISCONTINUITY (resumed=false)${reasonMsg ? ' — ' + reasonMsg : ''}`);
|
|
420
423
|
|
|
421
424
|
Sentry.withScope((scope) => {
|
|
422
425
|
scope.setTag('ably.client', 'runner');
|
|
@@ -459,7 +462,7 @@ class AblyService extends EventEmitter {
|
|
|
459
462
|
// Signal readiness to SDK — commands sent before this would be lost
|
|
460
463
|
const readyPayload = {
|
|
461
464
|
type: 'runner.ready',
|
|
462
|
-
os: 'windows',
|
|
465
|
+
os: process.platform === 'win32' ? 'windows' : 'linux',
|
|
463
466
|
sandboxId: this._sandboxId,
|
|
464
467
|
runnerVersion: getLocalVersion() || 'unknown',
|
|
465
468
|
timestamp: Date.now(),
|
|
@@ -473,6 +476,39 @@ class AblyService extends EventEmitter {
|
|
|
473
476
|
}
|
|
474
477
|
await this._sessionChannel.publish('control', readyPayload);
|
|
475
478
|
this.emit('log', 'Published runner.ready signal');
|
|
479
|
+
|
|
480
|
+
// Re-publish runner.ready every 3s for up to 60s.
|
|
481
|
+
// The SDK may connect after the first publish (race condition),
|
|
482
|
+
// and Ably channel history may not be enabled. Repeating ensures
|
|
483
|
+
// the SDK catches at least one live runner.ready message.
|
|
484
|
+
this._readyInterval = setInterval(async () => {
|
|
485
|
+
try {
|
|
486
|
+
readyPayload.timestamp = Date.now();
|
|
487
|
+
await this._sessionChannel.publish('control', readyPayload);
|
|
488
|
+
this.emit('log', 'Re-published runner.ready signal');
|
|
489
|
+
} catch (err) {
|
|
490
|
+
this.emit('log', `Failed to re-publish runner.ready: ${err.message}`);
|
|
491
|
+
}
|
|
492
|
+
}, 3000);
|
|
493
|
+
|
|
494
|
+
// Stop after 60s regardless
|
|
495
|
+
this._readyTimeout = setTimeout(() => {
|
|
496
|
+
this._stopReadySignal();
|
|
497
|
+
}, 60000);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Stop the repeated runner.ready signal (called on first command or after timeout).
|
|
502
|
+
*/
|
|
503
|
+
_stopReadySignal() {
|
|
504
|
+
if (this._readyInterval) {
|
|
505
|
+
clearInterval(this._readyInterval);
|
|
506
|
+
this._readyInterval = null;
|
|
507
|
+
}
|
|
508
|
+
if (this._readyTimeout) {
|
|
509
|
+
clearTimeout(this._readyTimeout);
|
|
510
|
+
this._readyTimeout = null;
|
|
511
|
+
}
|
|
476
512
|
}
|
|
477
513
|
|
|
478
514
|
/**
|
|
@@ -615,7 +651,9 @@ class AblyService extends EventEmitter {
|
|
|
615
651
|
* Disconnect from Ably and clean up.
|
|
616
652
|
*/
|
|
617
653
|
async close() {
|
|
618
|
-
this.emit('log', 'Closing
|
|
654
|
+
this.emit('log', 'Closing realtime service...');
|
|
655
|
+
|
|
656
|
+
this._stopReadySignal();
|
|
619
657
|
|
|
620
658
|
if (this._statsInterval) {
|
|
621
659
|
clearInterval(this._statsInterval);
|
package/lib/automation.js
CHANGED
|
@@ -45,8 +45,10 @@ const API_KEY = process.env.TD_API_KEY;
|
|
|
45
45
|
// shell injection and escaping issues.
|
|
46
46
|
|
|
47
47
|
const PYTHON = IS_WINDOWS ? 'python' : 'python3';
|
|
48
|
+
// On Linux, ensure DISPLAY is set (use env var or fallback to :0)
|
|
49
|
+
// The os.environ.get() preserves the parent's DISPLAY setting for E2B's :1 display
|
|
48
50
|
const PY_IMPORT = IS_LINUX
|
|
49
|
-
? "import os; os.environ
|
|
51
|
+
? "import os; os.environ.setdefault('DISPLAY', ':0'); import pyautogui, sys; pyautogui.FAILSAFE = False; "
|
|
50
52
|
: 'import pyautogui, sys; pyautogui.FAILSAFE = False; ';
|
|
51
53
|
|
|
52
54
|
/**
|
|
@@ -660,33 +662,68 @@ class Automation extends EventEmitter {
|
|
|
660
662
|
|
|
661
663
|
async _captureScreenshot() {
|
|
662
664
|
const sharp = require('sharp');
|
|
663
|
-
const
|
|
665
|
+
const maxAttempts = 3;
|
|
664
666
|
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
// Python handles Retina downscale: if physical size differs from logical,
|
|
668
|
-
// the image is resized to logical dimensions before saving.
|
|
669
|
-
await runPyAutoGUI(
|
|
670
|
-
'img = pyautogui.screenshot()\n' +
|
|
671
|
-
'logical = pyautogui.size()\n' +
|
|
672
|
-
'if img.size[0] != logical[0] or img.size[1] != logical[1]:\n' +
|
|
673
|
-
' from PIL import Image\n' +
|
|
674
|
-
' img = img.resize((logical[0], logical[1]), Image.LANCZOS)\n' +
|
|
675
|
-
'img.save(sys.argv[1], format="PNG")',
|
|
676
|
-
[tmpFile],
|
|
677
|
-
20000
|
|
678
|
-
);
|
|
667
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
668
|
+
const tmpFile = path.join(os.tmpdir(), `td_screenshot_${Date.now()}.png`);
|
|
679
669
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
670
|
+
try {
|
|
671
|
+
// Capture screenshot via pyautogui → saves to temp file
|
|
672
|
+
// Python handles Retina downscale: if physical size differs from logical,
|
|
673
|
+
// the image is resized to logical dimensions before saving.
|
|
674
|
+
await runPyAutoGUI(
|
|
675
|
+
'img = pyautogui.screenshot()\n' +
|
|
676
|
+
'logical = pyautogui.size()\n' +
|
|
677
|
+
'if img.size[0] != logical[0] or img.size[1] != logical[1]:\n' +
|
|
678
|
+
' from PIL import Image\n' +
|
|
679
|
+
' img = img.resize((logical[0], logical[1]), Image.LANCZOS)\n' +
|
|
680
|
+
'img.save(sys.argv[1], format="PNG")',
|
|
681
|
+
[tmpFile],
|
|
682
|
+
20000
|
|
683
|
+
);
|
|
685
684
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
685
|
+
// Read the PNG and re-encode with sharp (lossless, no compression)
|
|
686
|
+
const pngBuffer = fs.readFileSync(tmpFile);
|
|
687
|
+
const image = sharp(pngBuffer);
|
|
688
|
+
|
|
689
|
+
// Detect all-black screenshots (Xvfb/compositor issue)
|
|
690
|
+
if (IS_LINUX) {
|
|
691
|
+
const { channels } = await image.stats();
|
|
692
|
+
// channels[0..2] = R, G, B — check if max pixel value across all channels is near-zero
|
|
693
|
+
const maxPixel = Math.max(
|
|
694
|
+
channels[0]?.max ?? 0,
|
|
695
|
+
channels[1]?.max ?? 0,
|
|
696
|
+
channels[2]?.max ?? 0
|
|
697
|
+
);
|
|
698
|
+
if (maxPixel <= 1) {
|
|
699
|
+
console.warn(`[automation] Screenshot attempt ${attempt}/${maxAttempts}: image is all black (max pixel=${maxPixel})`);
|
|
700
|
+
if (attempt < maxAttempts) {
|
|
701
|
+
// Try to heal: poke the display to trigger a redraw
|
|
702
|
+
try {
|
|
703
|
+
await runPyAutoGUI(
|
|
704
|
+
"import subprocess; " +
|
|
705
|
+
"subprocess.run(['xdotool', 'key', '--clearmodifiers', 'super'], timeout=5); " +
|
|
706
|
+
"subprocess.run(['xset', 's', 'off'], timeout=5); " +
|
|
707
|
+
"subprocess.run(['xset', 's', 'noblank'], timeout=5); " +
|
|
708
|
+
"subprocess.run(['xset', '-dpms'], timeout=5)",
|
|
709
|
+
[],
|
|
710
|
+
10000
|
|
711
|
+
);
|
|
712
|
+
} catch {}
|
|
713
|
+
// Wait for display to recover
|
|
714
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
715
|
+
continue;
|
|
716
|
+
}
|
|
717
|
+
console.error('[automation] All screenshot attempts returned black — display may be broken');
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
const buffer = await image.png({ compressionLevel: 0 }).toBuffer();
|
|
722
|
+
return buffer.toString('base64');
|
|
723
|
+
} finally {
|
|
724
|
+
// Clean up temp file
|
|
725
|
+
try { fs.unlinkSync(tmpFile); } catch {}
|
|
726
|
+
}
|
|
690
727
|
}
|
|
691
728
|
}
|
|
692
729
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@testdriverai/runner",
|
|
3
|
-
"version": "7.8.0-canary.
|
|
3
|
+
"version": "7.8.0-canary.16",
|
|
4
4
|
"description": "TestDriver Runner - Ably-based remote automation agent with Node.js automation",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -37,6 +37,9 @@
|
|
|
37
37
|
"sharp": "^0.33.0",
|
|
38
38
|
"uuid": "^9.0.0"
|
|
39
39
|
},
|
|
40
|
+
"publishConfig": {
|
|
41
|
+
"access": "public"
|
|
42
|
+
},
|
|
40
43
|
"devDependencies": {
|
|
41
44
|
"e2b": "^2.12.1"
|
|
42
45
|
}
|
package/sandbox-agent.js
CHANGED
|
@@ -236,8 +236,8 @@ async function main() {
|
|
|
236
236
|
updateInfo: null, // sandbox-agent doesn't do self-update checks
|
|
237
237
|
});
|
|
238
238
|
|
|
239
|
-
ablyService.on('log', (msg) => log(`[
|
|
240
|
-
ablyService.on('error', (err) => log(`[
|
|
239
|
+
ablyService.on('log', (msg) => log(`[realtime] ${msg}`));
|
|
240
|
+
ablyService.on('error', (err) => log(`[realtime] ERROR: ${err.message}`));
|
|
241
241
|
|
|
242
242
|
await ablyService.connect();
|
|
243
243
|
log('Agent ready — listening for commands via Ably');
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# ─── TestDriver Sandbox Agent Startup ────────────────────────────────────────
|
|
3
|
+
# Starts the sandbox-agent.js (Ably-based automation agent) inside the E2B
|
|
4
|
+
# sandbox. This script is called by the API after writing the config file
|
|
5
|
+
# to /tmp/testdriver-agent.json.
|
|
6
|
+
#
|
|
7
|
+
# This matches the Windows runner pattern: the agent runs locally on the
|
|
8
|
+
# sandbox and executes commands via pyautogui (instead of @e2b/desktop RPC).
|
|
9
|
+
#
|
|
10
|
+
# Usage: bash /opt/testdriver-runner/scripts-desktop/start-agent.sh [&]
|
|
11
|
+
#
|
|
12
|
+
# Prerequisites:
|
|
13
|
+
# - Desktop environment running (start-desktop.sh completed)
|
|
14
|
+
# - Config file at /tmp/testdriver-agent.json with Ably credentials
|
|
15
|
+
# - Node.js installed
|
|
16
|
+
# - Runner installed at /opt/testdriver-runner
|
|
17
|
+
|
|
18
|
+
set -e
|
|
19
|
+
|
|
20
|
+
export DISPLAY="${DISPLAY:-:0}"
|
|
21
|
+
export XAUTHORITY="${XAUTHORITY:-${HOME}/.Xauthority}"
|
|
22
|
+
|
|
23
|
+
RUNNER_DIR="/opt/testdriver-runner"
|
|
24
|
+
CONFIG_PATH="/tmp/testdriver-agent.json"
|
|
25
|
+
LOG_FILE="/tmp/sandbox-agent.log"
|
|
26
|
+
PID_FILE="/tmp/sandbox-agent.pid"
|
|
27
|
+
|
|
28
|
+
log() {
|
|
29
|
+
echo "[$(date -Iseconds)] [start-agent] $1" | tee -a "$LOG_FILE"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# ─── Check if already running ─────────────────────────────────────────────────
|
|
33
|
+
if [ -f "$PID_FILE" ]; then
|
|
34
|
+
existing_pid=$(cat "$PID_FILE")
|
|
35
|
+
if kill -0 "$existing_pid" 2>/dev/null; then
|
|
36
|
+
log "Agent already running (PID: $existing_pid), exiting"
|
|
37
|
+
exit 0
|
|
38
|
+
else
|
|
39
|
+
log "Stale PID file found, removing"
|
|
40
|
+
rm -f "$PID_FILE"
|
|
41
|
+
fi
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
# ─── Verify prerequisites ─────────────────────────────────────────────────────
|
|
45
|
+
if [ ! -d "$RUNNER_DIR" ]; then
|
|
46
|
+
log "ERROR: Runner not found at $RUNNER_DIR"
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
if [ ! -f "$RUNNER_DIR/sandbox-agent.js" ]; then
|
|
51
|
+
log "ERROR: sandbox-agent.js not found in $RUNNER_DIR"
|
|
52
|
+
exit 1
|
|
53
|
+
fi
|
|
54
|
+
|
|
55
|
+
if ! command -v node &> /dev/null; then
|
|
56
|
+
log "ERROR: Node.js not installed"
|
|
57
|
+
exit 1
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
# ─── Wait for config file (with timeout) ─────────────────────────────────────
|
|
61
|
+
# The API writes the config file before calling this script, but we add a
|
|
62
|
+
# brief wait just in case there's any race condition.
|
|
63
|
+
WAIT_TIMEOUT=30
|
|
64
|
+
WAIT_INTERVAL=1
|
|
65
|
+
elapsed=0
|
|
66
|
+
|
|
67
|
+
log "Waiting for config file: $CONFIG_PATH"
|
|
68
|
+
while [ ! -f "$CONFIG_PATH" ] && [ $elapsed -lt $WAIT_TIMEOUT ]; do
|
|
69
|
+
sleep $WAIT_INTERVAL
|
|
70
|
+
elapsed=$((elapsed + WAIT_INTERVAL))
|
|
71
|
+
done
|
|
72
|
+
|
|
73
|
+
if [ ! -f "$CONFIG_PATH" ]; then
|
|
74
|
+
log "ERROR: Config file not found after ${WAIT_TIMEOUT}s: $CONFIG_PATH"
|
|
75
|
+
exit 1
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
log "Config file found"
|
|
79
|
+
|
|
80
|
+
# ─── Start the agent ──────────────────────────────────────────────────────────
|
|
81
|
+
log "Starting sandbox-agent.js..."
|
|
82
|
+
log "DISPLAY=$DISPLAY, RUNNER_DIR=$RUNNER_DIR"
|
|
83
|
+
|
|
84
|
+
# Run in background, redirect output to log file
|
|
85
|
+
cd "$RUNNER_DIR"
|
|
86
|
+
nohup node sandbox-agent.js >> "$LOG_FILE" 2>&1 &
|
|
87
|
+
AGENT_PID=$!
|
|
88
|
+
|
|
89
|
+
# Write PID file for process management
|
|
90
|
+
echo "$AGENT_PID" > "$PID_FILE"
|
|
91
|
+
|
|
92
|
+
log "Agent started (PID: $AGENT_PID)"
|
|
93
|
+
log "Log file: $LOG_FILE"
|
|
94
|
+
|
|
95
|
+
# Brief pause to catch any immediate startup errors
|
|
96
|
+
sleep 2
|
|
97
|
+
|
|
98
|
+
if kill -0 "$AGENT_PID" 2>/dev/null; then
|
|
99
|
+
log "Agent running successfully"
|
|
100
|
+
exit 0
|
|
101
|
+
else
|
|
102
|
+
log "ERROR: Agent exited unexpectedly. Check $LOG_FILE for details"
|
|
103
|
+
tail -20 "$LOG_FILE" | while read line; do log " $line"; done
|
|
104
|
+
exit 1
|
|
105
|
+
fi
|
|
@@ -60,6 +60,23 @@ if [ -z "$DBUS_SESSION_BUS_ADDRESS" ]; then
|
|
|
60
60
|
export DBUS_SESSION_BUS_ADDRESS
|
|
61
61
|
fi
|
|
62
62
|
|
|
63
|
+
# ─── Pre-configure xfwm4 to disable compositor ───────────────────────────────
|
|
64
|
+
# Writing the config file BEFORE starting XFCE ensures xfwm4 starts with
|
|
65
|
+
# compositing disabled from frame zero. The previous approach ran xfconf-query
|
|
66
|
+
# 3 seconds after startxfce4, but xfwm4 often started with compositing enabled
|
|
67
|
+
# before the query ran (or dbus wasn't ready) — causing the Xvfb framebuffer to
|
|
68
|
+
# stay permanently black (~1/15 runs). Pre-writing the XML avoids the race.
|
|
69
|
+
mkdir -p "${HOME}/.config/xfce4/xfconf/xfce-perchannel-xml"
|
|
70
|
+
cat > "${HOME}/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml" << 'EOF'
|
|
71
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
72
|
+
<channel name="xfwm4" version="1.0">
|
|
73
|
+
<property name="general" type="empty">
|
|
74
|
+
<property name="use_compositing" type="bool" value="false"/>
|
|
75
|
+
</property>
|
|
76
|
+
</channel>
|
|
77
|
+
EOF
|
|
78
|
+
echo "[start-desktop] xfwm4 compositor pre-disabled via config file"
|
|
79
|
+
|
|
63
80
|
# ─── Start XFCE desktop ──────────────────────────────────────────────────────
|
|
64
81
|
if pgrep -x xfce4-session > /dev/null 2>&1; then
|
|
65
82
|
echo "[start-desktop] XFCE already running, skipping"
|
|
@@ -68,9 +85,6 @@ else
|
|
|
68
85
|
startxfce4 &
|
|
69
86
|
sleep 3
|
|
70
87
|
|
|
71
|
-
# Disable xfwm4 compositor (causes black screen in Xvfb — no GPU)
|
|
72
|
-
xfconf-query -c xfwm4 -p /general/use_compositing -s false 2>/dev/null || true
|
|
73
|
-
|
|
74
88
|
# Kill power manager, screensaver, and error dialogs (not needed in headless)
|
|
75
89
|
killall xfce4-power-manager 2>/dev/null || true
|
|
76
90
|
killall xfce4-screensaver 2>/dev/null || true
|
|
@@ -78,6 +92,12 @@ else
|
|
|
78
92
|
xdotool search --name "Power Manager" windowclose 2>/dev/null || true
|
|
79
93
|
fi
|
|
80
94
|
|
|
95
|
+
# Always enforce compositor=off at runtime regardless of whether XFCE was already
|
|
96
|
+
# running. Belt-and-suspenders: covers the case where this script's previous run
|
|
97
|
+
# started XFCE (skipping the else-block above), or where xfwm4 somehow ignored
|
|
98
|
+
# the config file.
|
|
99
|
+
xfconf-query -c xfwm4 -p /general/use_compositing -s false 2>/dev/null || true
|
|
100
|
+
|
|
81
101
|
# ─── Set TestDriver wallpaper ─────────────────────────────────────────────────
|
|
82
102
|
WALLPAPER="/usr/share/backgrounds/xfce/wallpaper.png"
|
|
83
103
|
if [ -f "$WALLPAPER" ]; then
|
|
@@ -155,7 +175,130 @@ sleep 1
|
|
|
155
175
|
|
|
156
176
|
echo "[start-desktop] Desktop environment ready"
|
|
157
177
|
|
|
178
|
+
# ─── Helper: restart Xvfb ────────────────────────────────────────────────────
|
|
179
|
+
restart_xvfb() {
|
|
180
|
+
echo "[watchdog] Restarting Xvfb..."
|
|
181
|
+
killall Xvfb 2>/dev/null || true
|
|
182
|
+
sleep 1
|
|
183
|
+
rm -f /tmp/.X0-lock /tmp/.X11-unix/X0 2>/dev/null
|
|
184
|
+
Xvfb :0 -ac -screen 0 "${SCREEN_WIDTH}x${SCREEN_HEIGHT}x24" -retro -nolisten tcp &
|
|
185
|
+
XVFB_PID=$!
|
|
186
|
+
# Wait for Xvfb to be ready
|
|
187
|
+
for _w in $(seq 1 10); do
|
|
188
|
+
xdpyinfo -display :0 > /dev/null 2>&1 && break
|
|
189
|
+
sleep 1
|
|
190
|
+
done
|
|
191
|
+
if ! kill -0 $XVFB_PID 2>/dev/null; then
|
|
192
|
+
echo "[watchdog] ERROR: Xvfb failed to restart"
|
|
193
|
+
return 1
|
|
194
|
+
fi
|
|
195
|
+
# Re-disable screen blanking & DPMS on the fresh Xvfb
|
|
196
|
+
xset s off 2>/dev/null || true
|
|
197
|
+
xset s noblank 2>/dev/null || true
|
|
198
|
+
xset -dpms 2>/dev/null || true
|
|
199
|
+
echo "[watchdog] Xvfb restarted (PID: $XVFB_PID)"
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# ─── Helper: restart xfce4 (mirrors E2B's defunct-process check) ─────────────
|
|
203
|
+
restart_xfce4() {
|
|
204
|
+
echo "[watchdog] Restarting xfce4-session..."
|
|
205
|
+
killall xfce4-session 2>/dev/null || true
|
|
206
|
+
sleep 1
|
|
207
|
+
startxfce4 &
|
|
208
|
+
XFCE4_PID=$!
|
|
209
|
+
sleep 3
|
|
210
|
+
killall xfce4-power-manager 2>/dev/null || true
|
|
211
|
+
killall xfce4-screensaver 2>/dev/null || true
|
|
212
|
+
xfconf-query -c xfwm4 -p /general/use_compositing -s false 2>/dev/null || true
|
|
213
|
+
xset s off 2>/dev/null || true
|
|
214
|
+
xset s noblank 2>/dev/null || true
|
|
215
|
+
xset -dpms 2>/dev/null || true
|
|
216
|
+
echo "[watchdog] xfce4-session restarted (PID: $XFCE4_PID)"
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# ─── Helper: restart x11vnc ──────────────────────────────────────────────────
|
|
220
|
+
restart_x11vnc() {
|
|
221
|
+
echo "[watchdog] Restarting x11vnc..."
|
|
222
|
+
killall x11vnc 2>/dev/null || true
|
|
223
|
+
sleep 1
|
|
224
|
+
x11vnc -display :0 -forever -nopw -shared -rfbport 5900 \
|
|
225
|
+
-noxdamage -fixscreen V=2 \
|
|
226
|
+
-bg -o /dev/null 2>/dev/null || true
|
|
227
|
+
echo "[watchdog] x11vnc restarted"
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# ─── Watchdog loop ────────────────────────────────────────────────────────────
|
|
231
|
+
# Monitors Xvfb, xfce4-session, and x11vnc health every 10 seconds.
|
|
232
|
+
# Restarts any component that has crashed or become defunct.
|
|
233
|
+
# Also periodically re-disables screen blanking/compositor as belt-and-suspenders.
|
|
234
|
+
WATCHDOG_INTERVAL=10
|
|
235
|
+
BLANKING_RESET_COUNTER=0
|
|
236
|
+
|
|
237
|
+
watchdog_loop() {
|
|
238
|
+
while true; do
|
|
239
|
+
sleep "$WATCHDOG_INTERVAL"
|
|
240
|
+
|
|
241
|
+
# ── Check Xvfb ──
|
|
242
|
+
if ! pgrep -x Xvfb > /dev/null 2>&1; then
|
|
243
|
+
echo "[watchdog] Xvfb not running! Recovering..."
|
|
244
|
+
restart_xvfb
|
|
245
|
+
# x11vnc and xfce need a running Xvfb, restart them too
|
|
246
|
+
restart_xfce4
|
|
247
|
+
restart_x11vnc
|
|
248
|
+
continue
|
|
249
|
+
fi
|
|
250
|
+
|
|
251
|
+
# Verify Xvfb is actually responding (not just a zombie process)
|
|
252
|
+
if ! xdpyinfo -display :0 > /dev/null 2>&1; then
|
|
253
|
+
echo "[watchdog] Xvfb process exists but display :0 is unresponsive! Recovering..."
|
|
254
|
+
restart_xvfb
|
|
255
|
+
restart_xfce4
|
|
256
|
+
restart_x11vnc
|
|
257
|
+
continue
|
|
258
|
+
fi
|
|
259
|
+
|
|
260
|
+
# ── Check xfce4-session (E2B pattern: detect <defunct> zombie) ──
|
|
261
|
+
XFCE_PID=$(pgrep -x xfce4-session | head -1)
|
|
262
|
+
if [ -z "$XFCE_PID" ]; then
|
|
263
|
+
echo "[watchdog] xfce4-session not running! Restarting..."
|
|
264
|
+
restart_xfce4
|
|
265
|
+
elif ps aux | grep "$XFCE_PID" | grep -v grep | head -1 | grep -q '<defunct>'; then
|
|
266
|
+
echo "[watchdog] xfce4-session is defunct (zombie)! Restarting..."
|
|
267
|
+
restart_xfce4
|
|
268
|
+
fi
|
|
269
|
+
|
|
270
|
+
# ── Check x11vnc ──
|
|
271
|
+
if ! pgrep -x x11vnc > /dev/null 2>&1; then
|
|
272
|
+
echo "[watchdog] x11vnc not running! Restarting..."
|
|
273
|
+
restart_x11vnc
|
|
274
|
+
fi
|
|
275
|
+
|
|
276
|
+
# ── Periodically re-disable screen blanking & compositor (every ~60s) ──
|
|
277
|
+
BLANKING_RESET_COUNTER=$((BLANKING_RESET_COUNTER + 1))
|
|
278
|
+
if [ "$BLANKING_RESET_COUNTER" -ge 6 ]; then
|
|
279
|
+
BLANKING_RESET_COUNTER=0
|
|
280
|
+
xset s off 2>/dev/null || true
|
|
281
|
+
xset s noblank 2>/dev/null || true
|
|
282
|
+
xset -dpms 2>/dev/null || true
|
|
283
|
+
xfconf-query -c xfwm4 -p /general/use_compositing -s false 2>/dev/null || true
|
|
284
|
+
fi
|
|
285
|
+
|
|
286
|
+
# ── Monitor /dev/shm usage ──
|
|
287
|
+
if [ -d /dev/shm ]; then
|
|
288
|
+
SHM_USAGE=$(df /dev/shm 2>/dev/null | awk 'NR==2 {print $5}' | tr -d '%')
|
|
289
|
+
if [ -n "$SHM_USAGE" ] && [ "$SHM_USAGE" -gt 90 ] 2>/dev/null; then
|
|
290
|
+
echo "[watchdog] WARNING: /dev/shm is ${SHM_USAGE}% full — X11 may fail to allocate pixmaps"
|
|
291
|
+
fi
|
|
292
|
+
fi
|
|
293
|
+
done
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Start watchdog in background
|
|
297
|
+
watchdog_loop &
|
|
298
|
+
WATCHDOG_PID=$!
|
|
299
|
+
echo "[start-desktop] Watchdog started (PID: $WATCHDOG_PID)"
|
|
300
|
+
|
|
158
301
|
# Keep the script running so E2B doesn't consider the sandbox stopped
|
|
159
302
|
# Trap signals for clean shutdown
|
|
160
|
-
trap "kill $XVFB_PID $NOVNC_PID 2>/dev/null; exit 0" SIGTERM SIGINT
|
|
303
|
+
trap "kill $XVFB_PID $NOVNC_PID $WATCHDOG_PID 2>/dev/null; exit 0" SIGTERM SIGINT
|
|
161
304
|
wait
|