autokap 1.3.15 → 1.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.d.ts +14 -0
- package/dist/browser.js +57 -2
- package/dist/clip-capture-loop.js +7 -1
- package/dist/ffmpeg-x11-recorder.d.ts +52 -0
- package/dist/ffmpeg-x11-recorder.js +193 -0
- package/dist/web-playwright-local.js +72 -17
- package/dist/xvfb-process.d.ts +30 -0
- package/dist/xvfb-process.js +103 -0
- package/package.json +1 -1
package/dist/browser.d.ts
CHANGED
|
@@ -99,6 +99,20 @@ export declare class Browser {
|
|
|
99
99
|
private elementMap;
|
|
100
100
|
private akNodeIndex;
|
|
101
101
|
private poolContext;
|
|
102
|
+
/**
|
|
103
|
+
* Xvfb instance backing the headed Chromium used by clip capture on Cloud
|
|
104
|
+
* Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
|
|
105
|
+
* (Mac/Win and local Linux use headless Playwright). Lifetime matches the
|
|
106
|
+
* browser process — closeContext() leaves it alone, close() tears it down.
|
|
107
|
+
*/
|
|
108
|
+
private xvfb;
|
|
109
|
+
/** Public read access for the clip recorder (needs the DISPLAY string). */
|
|
110
|
+
get xvfbDisplay(): string | null;
|
|
111
|
+
/** Viewport size used for clip capture region (matches Xvfb screen). */
|
|
112
|
+
get viewport(): {
|
|
113
|
+
width: number;
|
|
114
|
+
height: number;
|
|
115
|
+
};
|
|
102
116
|
constructor(options: BrowserOptions);
|
|
103
117
|
/**
|
|
104
118
|
* Create a Browser using the shared pool (server/web API mode).
|
package/dist/browser.js
CHANGED
|
@@ -99,6 +99,7 @@ function resolveEffectivePadding(config, bbox) {
|
|
|
99
99
|
import { CAPTURE_HIDE_STYLE_ID, dismissCookiesAndWidgets, ensureCaptureHideStyles, getCaptureHideCSS, } from './cookie-dismiss.js';
|
|
100
100
|
import { CHROMIUM_ARGS, browserPool } from './browser-pool.js';
|
|
101
101
|
import { isDebugEnabled, logger } from './logger.js';
|
|
102
|
+
import { XvfbProcess } from './xvfb-process.js';
|
|
102
103
|
async function withHelperTimeout(label, timeoutMs, work) {
|
|
103
104
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
104
105
|
return work();
|
|
@@ -775,6 +776,17 @@ export class Browser {
|
|
|
775
776
|
elementMap = new Map();
|
|
776
777
|
akNodeIndex = new Map();
|
|
777
778
|
poolContext = false;
|
|
779
|
+
/**
|
|
780
|
+
* Xvfb instance backing the headed Chromium used by clip capture on Cloud
|
|
781
|
+
* Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
|
|
782
|
+
* (Mac/Win and local Linux use headless Playwright). Lifetime matches the
|
|
783
|
+
* browser process — closeContext() leaves it alone, close() tears it down.
|
|
784
|
+
*/
|
|
785
|
+
xvfb = null;
|
|
786
|
+
/** Public read access for the clip recorder (needs the DISPLAY string). */
|
|
787
|
+
get xvfbDisplay() { return this.xvfb?.display ?? null; }
|
|
788
|
+
/** Viewport size used for clip capture region (matches Xvfb screen). */
|
|
789
|
+
get viewport() { return this.options.viewport; }
|
|
778
790
|
constructor(options) {
|
|
779
791
|
this.options = options;
|
|
780
792
|
}
|
|
@@ -850,17 +862,49 @@ export class Browser {
|
|
|
850
862
|
'--enable-zero-copy',
|
|
851
863
|
]
|
|
852
864
|
: [];
|
|
865
|
+
// Cloud Run Linux + GPU: spawn Xvfb so headed Chromium has a display to
|
|
866
|
+
// render into, and ffmpeg x11grab can capture that framebuffer directly
|
|
867
|
+
// (the FfmpegX11Recorder + h264_nvenc encoding path used by clip recording).
|
|
868
|
+
// The CDP Page.captureScreenshot path on cloud topped out at 9 fps even
|
|
869
|
+
// with the GPU compositor active because libjpeg-turbo CPU encode + CDP
|
|
870
|
+
// transport overhead pinned per-frame cost at ~85 ms. Capturing the X
|
|
871
|
+
// framebuffer with NVENC bypasses both costs.
|
|
872
|
+
const xvfbWidth = Math.round(options.viewport.width);
|
|
873
|
+
const xvfbHeight = Math.round(options.viewport.height);
|
|
874
|
+
if (isLinuxWithGpu) {
|
|
875
|
+
instance.xvfb = new XvfbProcess({
|
|
876
|
+
displayNumber: 99,
|
|
877
|
+
width: xvfbWidth,
|
|
878
|
+
height: xvfbHeight,
|
|
879
|
+
});
|
|
880
|
+
await instance.xvfb.start();
|
|
881
|
+
// Chromium picks up DISPLAY from the parent process env. ffmpeg will
|
|
882
|
+
// read the same display via FfmpegX11RecorderOptions.
|
|
883
|
+
process.env.DISPLAY = instance.xvfb.display;
|
|
884
|
+
logger.info(`[capture] Cloud clip capture: Chromium → Xvfb ${instance.xvfb.display} → ffmpeg x11grab + h264_nvenc path enabled`);
|
|
885
|
+
}
|
|
886
|
+
// Kiosk + zero-position anchor for Xvfb: Chromium normally renders its
|
|
887
|
+
// own toolbar/tabbar in headed mode, which would appear at the top of
|
|
888
|
+
// every clip. `--kiosk` removes all UI; `--window-position=0,0` and
|
|
889
|
+
// `--window-size` make the page fill the Xvfb screen exactly.
|
|
890
|
+
const xvfbWindowArgs = isLinuxWithGpu ? [
|
|
891
|
+
'--kiosk',
|
|
892
|
+
'--window-position=0,0',
|
|
893
|
+
] : [];
|
|
853
894
|
const clipArgs = [
|
|
854
895
|
...baseArgs,
|
|
855
896
|
`--force-device-scale-factor=${deviceScaleFactor}`,
|
|
856
|
-
`--window-size=${
|
|
897
|
+
`--window-size=${xvfbWidth},${xvfbHeight}`,
|
|
857
898
|
...(angleArg ? [angleArg] : []),
|
|
858
899
|
...cloudGpuArgs,
|
|
900
|
+
...xvfbWindowArgs,
|
|
859
901
|
];
|
|
860
902
|
// Dedicated browser process for clip capture. Not pooled because clip
|
|
861
903
|
// capture installs context-level init scripts (cursor overlay).
|
|
904
|
+
// Cloud Run with Xvfb: launch headed (headless: false) so Chromium
|
|
905
|
+
// renders to the Xvfb framebuffer that ffmpeg captures.
|
|
862
906
|
instance.browser = await chromium.launch({
|
|
863
|
-
headless: !options.headed,
|
|
907
|
+
headless: isLinuxWithGpu ? false : !options.headed,
|
|
864
908
|
args: clipArgs,
|
|
865
909
|
});
|
|
866
910
|
const contextOptions = {
|
|
@@ -1096,6 +1140,17 @@ export class Browser {
|
|
|
1096
1140
|
this.context = null;
|
|
1097
1141
|
this.page = null;
|
|
1098
1142
|
}
|
|
1143
|
+
// Tear down Xvfb only after Chromium is fully gone — Chromium needs the
|
|
1144
|
+
// X display for its own teardown (releasing GL contexts, X resources).
|
|
1145
|
+
if (this.xvfb) {
|
|
1146
|
+
try {
|
|
1147
|
+
await this.xvfb.stop();
|
|
1148
|
+
}
|
|
1149
|
+
catch (err) {
|
|
1150
|
+
logger.warn(`[xvfb] stop failed: ${err.message}`);
|
|
1151
|
+
}
|
|
1152
|
+
this.xvfb = null;
|
|
1153
|
+
}
|
|
1099
1154
|
}
|
|
1100
1155
|
async navigateTo(url) {
|
|
1101
1156
|
const page = this.ensurePage();
|
|
@@ -45,7 +45,13 @@ export class ClipCaptureLoop {
|
|
|
45
45
|
const targetFps = Math.max(1, Math.min(30, opts.targetFps ?? platformDefault));
|
|
46
46
|
this.targetFps = targetFps;
|
|
47
47
|
this.targetFrameIntervalMs = 1000 / targetFps;
|
|
48
|
-
|
|
48
|
+
// minRestMs adds a forced sleep after each frame, intended to yield the
|
|
49
|
+
// event loop on slow runners. With Cloud Run + L4 GPU, CDP capture itself
|
|
50
|
+
// takes ~85ms, so ANY non-zero minRestMs caps observed FPS below the
|
|
51
|
+
// already-low CDP ceiling (16ms forced rest = 9.9 fps cap, matched the
|
|
52
|
+
// 1.3.15 plateau exactly). Drop to 0 on cloud — the await on the next
|
|
53
|
+
// CDP send already yields the loop.
|
|
54
|
+
const linuxMinRest = isCloudRunner ? 0 : 50;
|
|
49
55
|
const platformMinRest = process.platform === 'linux' ? linuxMinRest : 16;
|
|
50
56
|
this.minRestMs = Math.max(0, Math.min(250, opts.minRestMs ?? platformMinRest));
|
|
51
57
|
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ffmpeg x11grab recorder with NVIDIA NVENC hardware encoder.
|
|
3
|
+
*
|
|
4
|
+
* Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
|
|
5
|
+
* fixed framerate, encoded straight to MP4 via `h264_nvenc` running on the
|
|
6
|
+
* Cloud Run NVIDIA L4 GPU. Decouples clip recording from Chromium's
|
|
7
|
+
* compositor speed AND from the CPU JPEG encoder that capped the previous
|
|
8
|
+
* CDP `Page.captureScreenshot` path at ~9 fps.
|
|
9
|
+
*
|
|
10
|
+
* Why this works on Cloud Run when v1.3.10 (libx264) didn't:
|
|
11
|
+
* - v1.3.10 used CPU encoder (libx264) which competed with Chromium for
|
|
12
|
+
* vCPU on the same machine, giving 1.83 fps.
|
|
13
|
+
* - h264_nvenc offloads the entire encode to dedicated NVIDIA NVENC silicon
|
|
14
|
+
* on the L4 (separate from CUDA cores). Encode is essentially free CPU-side.
|
|
15
|
+
* - The L4 has 2× NVENC blocks and can sustain multiple 4K streams in
|
|
16
|
+
* parallel, so 1440x900@30fps is trivial.
|
|
17
|
+
*
|
|
18
|
+
* Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
|
|
19
|
+
* runs for the whole browser process lifetime.
|
|
20
|
+
*
|
|
21
|
+
* Refs:
|
|
22
|
+
* - https://docs.cloud.google.com/run/docs/tutorials/video-encoding
|
|
23
|
+
* - https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/ffmpeg-with-nvidia-gpu/index.html
|
|
24
|
+
*/
|
|
25
|
+
export interface FfmpegX11RecorderOptions {
|
|
26
|
+
/** DISPLAY string (e.g. `:99`). */
|
|
27
|
+
display: string;
|
|
28
|
+
/** Capture region width in pixels. Should match Xvfb screen width. */
|
|
29
|
+
width: number;
|
|
30
|
+
/** Capture region height in pixels. Should match Xvfb screen height. */
|
|
31
|
+
height: number;
|
|
32
|
+
/** Target framerate. */
|
|
33
|
+
fps: number;
|
|
34
|
+
/** Absolute path to the output .mp4 file. */
|
|
35
|
+
outputPath: string;
|
|
36
|
+
}
|
|
37
|
+
export interface FfmpegX11RecorderResult {
|
|
38
|
+
outputPath: string;
|
|
39
|
+
trimStartMs: number;
|
|
40
|
+
durationMs: number;
|
|
41
|
+
}
|
|
42
|
+
export declare class FfmpegX11Recorder {
|
|
43
|
+
private readonly opts;
|
|
44
|
+
private process;
|
|
45
|
+
private startedAt;
|
|
46
|
+
private firstFrameAt;
|
|
47
|
+
private lastReportedFrameLine;
|
|
48
|
+
private stderrTail;
|
|
49
|
+
constructor(opts: FfmpegX11RecorderOptions);
|
|
50
|
+
start(): Promise<void>;
|
|
51
|
+
stop(): Promise<FfmpegX11RecorderResult>;
|
|
52
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ffmpeg x11grab recorder with NVIDIA NVENC hardware encoder.
|
|
3
|
+
*
|
|
4
|
+
* Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
|
|
5
|
+
* fixed framerate, encoded straight to MP4 via `h264_nvenc` running on the
|
|
6
|
+
* Cloud Run NVIDIA L4 GPU. Decouples clip recording from Chromium's
|
|
7
|
+
* compositor speed AND from the CPU JPEG encoder that capped the previous
|
|
8
|
+
* CDP `Page.captureScreenshot` path at ~9 fps.
|
|
9
|
+
*
|
|
10
|
+
* Why this works on Cloud Run when v1.3.10 (libx264) didn't:
|
|
11
|
+
* - v1.3.10 used CPU encoder (libx264) which competed with Chromium for
|
|
12
|
+
* vCPU on the same machine, giving 1.83 fps.
|
|
13
|
+
* - h264_nvenc offloads the entire encode to dedicated NVIDIA NVENC silicon
|
|
14
|
+
* on the L4 (separate from CUDA cores). Encode is essentially free CPU-side.
|
|
15
|
+
* - The L4 has 2× NVENC blocks and can sustain multiple 4K streams in
|
|
16
|
+
* parallel, so 1440x900@30fps is trivial.
|
|
17
|
+
*
|
|
18
|
+
* Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
|
|
19
|
+
* runs for the whole browser process lifetime.
|
|
20
|
+
*
|
|
21
|
+
* Refs:
|
|
22
|
+
* - https://docs.cloud.google.com/run/docs/tutorials/video-encoding
|
|
23
|
+
* - https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/ffmpeg-with-nvidia-gpu/index.html
|
|
24
|
+
*/
|
|
25
|
+
import { spawn } from 'node:child_process';
|
|
26
|
+
import fs from 'node:fs/promises';
|
|
27
|
+
import { logger } from './logger.js';
|
|
28
|
+
const FFMPEG_FIRST_FRAME_TIMEOUT_MS = 5_000;
|
|
29
|
+
const FFMPEG_FIRST_FRAME_POLL_MS = 50;
|
|
30
|
+
const FFMPEG_GRACEFUL_STOP_MS = 3_000;
|
|
31
|
+
const FFMPEG_FORCE_STOP_MS = 2_000;
|
|
32
|
+
export class FfmpegX11Recorder {
|
|
33
|
+
opts;
|
|
34
|
+
process = null;
|
|
35
|
+
startedAt = 0;
|
|
36
|
+
firstFrameAt = 0;
|
|
37
|
+
lastReportedFrameLine = null;
|
|
38
|
+
stderrTail = [];
|
|
39
|
+
constructor(opts) {
|
|
40
|
+
this.opts = opts;
|
|
41
|
+
}
|
|
42
|
+
async start() {
|
|
43
|
+
if (this.process)
|
|
44
|
+
throw new Error('ffmpeg x11grab already running');
|
|
45
|
+
const { display, width, height, fps, outputPath } = this.opts;
|
|
46
|
+
// -draw_mouse 0: hide the X cursor — the cursor overlay script paints a
|
|
47
|
+
// fake cursor in the DOM that's already captured via the page.
|
|
48
|
+
// -c:v h264_nvenc: NVIDIA hardware encoder. Eliminates CPU contention
|
|
49
|
+
// with Chromium that pinned v1.3.10 (libx264) at 1.83 fps.
|
|
50
|
+
// -preset p4 + -tune ll: NVENC preset 4 (balanced) with low-latency
|
|
51
|
+
// tuning. p1 is fastest but lower quality, p7 is highest quality but
|
|
52
|
+
// slower. p4 is the standard real-time recording preset per NVIDIA's
|
|
53
|
+
// FFmpeg+GPU guide. -tune ll skips B-frames and sets LL-friendly
|
|
54
|
+
// reference patterns so each frame can be encoded independently.
|
|
55
|
+
// -rc cbr -b:v 5M: constant bitrate 5 Mbps — produces smooth quality
|
|
56
|
+
// for screen content at 1440x900. CRF/CQ modes are CPU-side only;
|
|
57
|
+
// NVENC supports CBR/VBR/cqp.
|
|
58
|
+
// -pix_fmt yuv420p + +faststart: maximum playback compatibility (Safari,
|
|
59
|
+
// QuickTime, browser <video>). NVENC ingests RGBA from x11grab and
|
|
60
|
+
// converts internally.
|
|
61
|
+
const args = [
|
|
62
|
+
'-y',
|
|
63
|
+
'-loglevel', 'warning',
|
|
64
|
+
'-stats',
|
|
65
|
+
'-f', 'x11grab',
|
|
66
|
+
'-draw_mouse', '0',
|
|
67
|
+
'-framerate', String(fps),
|
|
68
|
+
'-video_size', `${width}x${height}`,
|
|
69
|
+
'-i', `${display}.0+0,0`,
|
|
70
|
+
'-c:v', 'h264_nvenc',
|
|
71
|
+
'-preset', 'p4',
|
|
72
|
+
'-tune', 'll',
|
|
73
|
+
'-rc', 'cbr',
|
|
74
|
+
'-b:v', '5M',
|
|
75
|
+
'-pix_fmt', 'yuv420p',
|
|
76
|
+
'-movflags', '+faststart',
|
|
77
|
+
outputPath,
|
|
78
|
+
];
|
|
79
|
+
// The cloud-runner image ships ONE ffmpeg binary at /usr/local/bin built
|
|
80
|
+
// from BtbN/FFmpeg-Builds with both x11grab AND h264_nvenc enabled.
|
|
81
|
+
// AUTOKAP_FFMPEG_X11_BIN can override for local testing or alternate
|
|
82
|
+
// builds. Falls back to PATH ffmpeg.
|
|
83
|
+
const ffmpegBin = process.env.AUTOKAP_FFMPEG_X11_BIN || 'ffmpeg';
|
|
84
|
+
logger.info(`[ffmpeg-x11] starting capture on ${display} → ${outputPath} (${width}×${height} @ ${fps}fps, encoder=h264_nvenc, bin=${ffmpegBin})`);
|
|
85
|
+
this.startedAt = performance.now();
|
|
86
|
+
// stdin is `pipe` so we can send 'q' for graceful shutdown (writes the
|
|
87
|
+
// moov atom; SIGTERM produces an unplayable file).
|
|
88
|
+
this.process = spawn(ffmpegBin, args, { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
89
|
+
let exited = false;
|
|
90
|
+
let exitError = null;
|
|
91
|
+
this.process.stderr?.on('data', (chunk) => {
|
|
92
|
+
const text = String(chunk);
|
|
93
|
+
this.stderrTail.push(text);
|
|
94
|
+
// Cap retained stderr at ~10 KB to avoid unbounded memory growth on
|
|
95
|
+
// long recordings.
|
|
96
|
+
while (this.stderrTail.join('').length > 10_000) {
|
|
97
|
+
this.stderrTail.shift();
|
|
98
|
+
}
|
|
99
|
+
// ffmpeg's progress lines look like: `frame= 42 fps=30 q=23 size= ...`
|
|
100
|
+
// First non-zero `frame=` value signals capture is actually streaming.
|
|
101
|
+
if (this.firstFrameAt === 0 && /frame=\s*[1-9]/.test(text)) {
|
|
102
|
+
this.firstFrameAt = performance.now();
|
|
103
|
+
}
|
|
104
|
+
// Track the latest progress line for the final summary log.
|
|
105
|
+
const match = text.match(/frame=\s*\d+\s+fps=[\d.]+\s+[^\n]+/);
|
|
106
|
+
if (match)
|
|
107
|
+
this.lastReportedFrameLine = match[0].trim();
|
|
108
|
+
});
|
|
109
|
+
this.process.on('exit', (code, signal) => {
|
|
110
|
+
exited = true;
|
|
111
|
+
const wasGracefulStop = signal === 'SIGTERM' || signal === 'SIGINT' || code === 0;
|
|
112
|
+
if (!wasGracefulStop && code !== null) {
|
|
113
|
+
exitError = new Error(`ffmpeg exited unexpectedly: code=${code} signal=${signal}\n` +
|
|
114
|
+
`Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
this.process.on('error', (err) => {
|
|
118
|
+
exitError = new Error(`ffmpeg spawn error: ${err.message}`);
|
|
119
|
+
});
|
|
120
|
+
// Wait for the first frame to confirm x11grab connected to Xvfb and
|
|
121
|
+
// encoding has begun. If ffmpeg dies before this, propagate the error.
|
|
122
|
+
const waitStartedAt = Date.now();
|
|
123
|
+
while (Date.now() - waitStartedAt < FFMPEG_FIRST_FRAME_TIMEOUT_MS) {
|
|
124
|
+
if (exited) {
|
|
125
|
+
throw exitError ?? new Error(`ffmpeg exited before first frame:\n${this.stderrTail.join('').slice(-2_000)}`);
|
|
126
|
+
}
|
|
127
|
+
if (this.firstFrameAt > 0) {
|
|
128
|
+
logger.info(`[ffmpeg-x11] capturing — first frame after ${Math.round(this.firstFrameAt - this.startedAt)}ms`);
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
await new Promise(r => setTimeout(r, FFMPEG_FIRST_FRAME_POLL_MS));
|
|
132
|
+
}
|
|
133
|
+
throw new Error(`ffmpeg did not produce first frame within ${FFMPEG_FIRST_FRAME_TIMEOUT_MS}ms\n` +
|
|
134
|
+
`Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
|
|
135
|
+
}
|
|
136
|
+
async stop() {
|
|
137
|
+
if (!this.process)
|
|
138
|
+
throw new Error('ffmpeg x11grab not running');
|
|
139
|
+
const proc = this.process;
|
|
140
|
+
this.process = null;
|
|
141
|
+
// 'q' → ffmpeg writes the moov atom and exits cleanly. SIGTERM/SIGKILL
|
|
142
|
+
// would corrupt the MP4 (no moov, unplayable in browsers).
|
|
143
|
+
try {
|
|
144
|
+
proc.stdin?.write('q');
|
|
145
|
+
proc.stdin?.end();
|
|
146
|
+
}
|
|
147
|
+
catch { /* stdin may already be closed */ }
|
|
148
|
+
await new Promise(resolve => {
|
|
149
|
+
const sigtermTimer = setTimeout(() => {
|
|
150
|
+
logger.warn(`[ffmpeg-x11] did not exit within ${FFMPEG_GRACEFUL_STOP_MS}ms — sending SIGTERM`);
|
|
151
|
+
try {
|
|
152
|
+
proc.kill('SIGTERM');
|
|
153
|
+
}
|
|
154
|
+
catch { /* already dead */ }
|
|
155
|
+
const sigkillTimer = setTimeout(() => {
|
|
156
|
+
try {
|
|
157
|
+
proc.kill('SIGKILL');
|
|
158
|
+
}
|
|
159
|
+
catch { /* already dead */ }
|
|
160
|
+
resolve();
|
|
161
|
+
}, FFMPEG_FORCE_STOP_MS);
|
|
162
|
+
proc.on('exit', () => { clearTimeout(sigkillTimer); resolve(); });
|
|
163
|
+
}, FFMPEG_GRACEFUL_STOP_MS);
|
|
164
|
+
proc.on('exit', () => { clearTimeout(sigtermTimer); resolve(); });
|
|
165
|
+
});
|
|
166
|
+
const stoppedAt = performance.now();
|
|
167
|
+
const trimStartMs = this.firstFrameAt > 0
|
|
168
|
+
? Math.max(0, this.firstFrameAt - this.startedAt)
|
|
169
|
+
: 0;
|
|
170
|
+
const durationMs = stoppedAt - this.startedAt;
|
|
171
|
+
let fileSize = 0;
|
|
172
|
+
try {
|
|
173
|
+
const stat = await fs.stat(this.opts.outputPath);
|
|
174
|
+
fileSize = stat.size;
|
|
175
|
+
if (fileSize === 0) {
|
|
176
|
+
throw new Error(`ffmpeg produced 0-byte file at ${this.opts.outputPath}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
throw new Error(`ffmpeg output unreadable: ${err.message}\n` +
|
|
181
|
+
`Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
|
|
182
|
+
}
|
|
183
|
+
logger.info(`[ffmpeg-x11] finalized: ${(fileSize / 1024).toFixed(1)} KB, ` +
|
|
184
|
+
`${(durationMs / 1000).toFixed(2)}s wall, trim ${Math.round(trimStartMs)}ms` +
|
|
185
|
+
(this.lastReportedFrameLine ? ` (${this.lastReportedFrameLine})` : ''));
|
|
186
|
+
return {
|
|
187
|
+
outputPath: this.opts.outputPath,
|
|
188
|
+
trimStartMs,
|
|
189
|
+
durationMs,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
//# sourceMappingURL=ffmpeg-x11-recorder.js.map
|
|
@@ -11,6 +11,7 @@ import { humanType, moveMouse, } from './mouse-animation.js';
|
|
|
11
11
|
import { resolveTarget } from './semantic-resolver.js';
|
|
12
12
|
import { logger } from './logger.js';
|
|
13
13
|
import { ClipCaptureLoop } from './clip-capture-loop.js';
|
|
14
|
+
import { FfmpegX11Recorder } from './ffmpeg-x11-recorder.js';
|
|
14
15
|
import { assembleMp4FromFrames, getMediaDurationMs } from './clip-postprocess.js';
|
|
15
16
|
export class WebPlaywrightLocal {
|
|
16
17
|
browser;
|
|
@@ -352,23 +353,51 @@ export class WebPlaywrightLocal {
|
|
|
352
353
|
const cloudClipFps = isCloudRunner ? 30 : defaultFps;
|
|
353
354
|
const targetFps = options.captureFps
|
|
354
355
|
?? (options.mediaMode === 'video' ? 30 : cloudClipFps);
|
|
355
|
-
const
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
356
|
+
const mp4Path = path.join(baseDir, `${options.mediaMode}.mp4`);
|
|
357
|
+
// Cloud Run + NVIDIA L4: capture the Xvfb framebuffer with ffmpeg x11grab
|
|
358
|
+
// and encode via h264_nvenc on the GPU. Bypasses both the CDP transport
|
|
359
|
+
// overhead and the libjpeg-turbo CPU encode that capped CDP screenshot
|
|
360
|
+
// capture at 9 fps. The Browser instance spawns Xvfb in forClipCapture
|
|
361
|
+
// when AUTOKAP_CLOUD_RUNNER=1; xvfbDisplay is the gating signal.
|
|
362
|
+
const xvfbDisplay = this.browser.xvfbDisplay;
|
|
363
|
+
if (isCloudRunner && xvfbDisplay) {
|
|
364
|
+
const viewport = this.browser.viewport;
|
|
365
|
+
const ffmpegRecorder = new FfmpegX11Recorder({
|
|
366
|
+
display: xvfbDisplay,
|
|
367
|
+
width: Math.round(viewport.width),
|
|
368
|
+
height: Math.round(viewport.height),
|
|
369
|
+
fps: targetFps,
|
|
370
|
+
outputPath: mp4Path,
|
|
371
|
+
});
|
|
372
|
+
await ffmpegRecorder.start();
|
|
373
|
+
this.recording = {
|
|
374
|
+
mediaMode: options.mediaMode,
|
|
375
|
+
startedAt: Date.now(),
|
|
376
|
+
framesDir,
|
|
377
|
+
mp4Path,
|
|
378
|
+
ffmpegRecorder,
|
|
379
|
+
finalized: false,
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
else {
|
|
383
|
+
const loop = new ClipCaptureLoop({
|
|
384
|
+
page,
|
|
385
|
+
framesDir,
|
|
386
|
+
targetFps,
|
|
387
|
+
// Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
|
|
388
|
+
// (sized for tight CI runners) to let the loop stay close to its target.
|
|
389
|
+
minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
|
|
390
|
+
});
|
|
391
|
+
await loop.start();
|
|
392
|
+
this.recording = {
|
|
393
|
+
mediaMode: options.mediaMode,
|
|
394
|
+
startedAt: Date.now(),
|
|
395
|
+
framesDir,
|
|
396
|
+
mp4Path,
|
|
397
|
+
loop,
|
|
398
|
+
finalized: false,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
372
401
|
this.clipCursor = {
|
|
373
402
|
currentPosition: null,
|
|
374
403
|
pace: options.mediaMode === 'video' ? 'natural' : 'fast',
|
|
@@ -461,6 +490,32 @@ export class WebPlaywrightLocal {
|
|
|
461
490
|
this.recordingNavWatcher.detach();
|
|
462
491
|
this.recordingNavWatcher = null;
|
|
463
492
|
}
|
|
493
|
+
// Cloud Run + NVENC path: ffmpeg has been recording the Xvfb framebuffer
|
|
494
|
+
// straight to MP4. stop() finalizes the moov atom; the file is already a
|
|
495
|
+
// playable H.264/AAC MP4. No JPEG concat needed.
|
|
496
|
+
if (this.recording.ffmpegRecorder) {
|
|
497
|
+
const ffmpegResult = await this.recording.ffmpegRecorder.stop();
|
|
498
|
+
logger.info(`[capture] Clip ffmpeg+nvenc capture: ${(ffmpegResult.durationMs / 1000).toFixed(2)}s wall, ` +
|
|
499
|
+
`trim ${Math.round(ffmpegResult.trimStartMs)}ms, output ${ffmpegResult.outputPath}`);
|
|
500
|
+
await this.browser.closeContext();
|
|
501
|
+
this.recording.finalized = true;
|
|
502
|
+
this.recording.ffmpegResult = ffmpegResult;
|
|
503
|
+
this.recording.sourcePath = ffmpegResult.outputPath;
|
|
504
|
+
this.recording.sourceMimeType = 'video/mp4';
|
|
505
|
+
this.recording.trimStartMs = ffmpegResult.trimStartMs;
|
|
506
|
+
this.recording.encodedDurationMs = await getMediaDurationMs(ffmpegResult.outputPath);
|
|
507
|
+
this.clipCursor = null;
|
|
508
|
+
const buffer = await fs.readFile(ffmpegResult.outputPath);
|
|
509
|
+
return {
|
|
510
|
+
buffer,
|
|
511
|
+
durationMs: this.recording.encodedDurationMs,
|
|
512
|
+
mimeType: 'video/mp4',
|
|
513
|
+
trimStartMs: ffmpegResult.trimStartMs,
|
|
514
|
+
};
|
|
515
|
+
}
|
|
516
|
+
if (!this.recording.loop) {
|
|
517
|
+
throw new Error('recording started without a loop or ffmpeg recorder');
|
|
518
|
+
}
|
|
464
519
|
const result = await this.recording.loop.stop();
|
|
465
520
|
logger.info(`[capture] Clip frame capture: ${result.frameCount} frame(s), ` +
|
|
466
521
|
`${result.measuredFps.toFixed(1)} fps over ${(result.actualDurationMs / 1000).toFixed(2)}s ` +
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Xvfb (X virtual framebuffer) process wrapper.
|
|
3
|
+
*
|
|
4
|
+
* Spins up a virtual X display that headed Chromium can render to. Used by
|
|
5
|
+
* cloud clip capture so the recording surface is reachable by ffmpeg via
|
|
6
|
+
* `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
|
|
7
|
+
* software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
|
|
8
|
+
*
|
|
9
|
+
* Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
|
|
10
|
+
* recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
|
|
11
|
+
* display.
|
|
12
|
+
*/
|
|
13
|
+
export interface XvfbProcessOptions {
|
|
14
|
+
/** Display number (without leading colon). E.g. 99 → DISPLAY=:99. */
|
|
15
|
+
displayNumber: number;
|
|
16
|
+
/** Screen width in pixels. Should match the Chromium window size. */
|
|
17
|
+
width: number;
|
|
18
|
+
/** Screen height in pixels. Should match the Chromium window size. */
|
|
19
|
+
height: number;
|
|
20
|
+
}
|
|
21
|
+
export declare class XvfbProcess {
|
|
22
|
+
private readonly opts;
|
|
23
|
+
private process;
|
|
24
|
+
private exited;
|
|
25
|
+
constructor(opts: XvfbProcessOptions);
|
|
26
|
+
/** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
|
|
27
|
+
get display(): string;
|
|
28
|
+
start(): Promise<void>;
|
|
29
|
+
stop(): Promise<void>;
|
|
30
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Xvfb (X virtual framebuffer) process wrapper.
|
|
3
|
+
*
|
|
4
|
+
* Spins up a virtual X display that headed Chromium can render to. Used by
|
|
5
|
+
* cloud clip capture so the recording surface is reachable by ffmpeg via
|
|
6
|
+
* `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
|
|
7
|
+
* software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
|
|
8
|
+
*
|
|
9
|
+
* Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
|
|
10
|
+
* recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
|
|
11
|
+
* display.
|
|
12
|
+
*/
|
|
13
|
+
import { spawn } from 'node:child_process';
|
|
14
|
+
import fs from 'node:fs/promises';
|
|
15
|
+
import { logger } from './logger.js';
|
|
16
|
+
const XVFB_READY_TIMEOUT_MS = 5_000;
|
|
17
|
+
const XVFB_READY_POLL_MS = 50;
|
|
18
|
+
const XVFB_STOP_GRACE_MS = 2_000;
|
|
19
|
+
export class XvfbProcess {
|
|
20
|
+
opts;
|
|
21
|
+
process = null;
|
|
22
|
+
exited = false;
|
|
23
|
+
constructor(opts) {
|
|
24
|
+
this.opts = opts;
|
|
25
|
+
}
|
|
26
|
+
/** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
|
|
27
|
+
get display() {
|
|
28
|
+
return `:${this.opts.displayNumber}`;
|
|
29
|
+
}
|
|
30
|
+
async start() {
|
|
31
|
+
if (this.process)
|
|
32
|
+
throw new Error('xvfb already started');
|
|
33
|
+
// -ac: no access control (any local client can connect)
|
|
34
|
+
// -screen 0 WxHxDEPTH: screen 0 sized to W×H at 24-bit color
|
|
35
|
+
// -nolisten tcp: only listen on the Unix socket (no network exposure)
|
|
36
|
+
// -dpi 96: pin DPI so CSS pixel sizing matches a typical monitor
|
|
37
|
+
const args = [
|
|
38
|
+
this.display,
|
|
39
|
+
'-ac',
|
|
40
|
+
'-screen', '0', `${this.opts.width}x${this.opts.height}x24`,
|
|
41
|
+
'-nolisten', 'tcp',
|
|
42
|
+
'-dpi', '96',
|
|
43
|
+
];
|
|
44
|
+
this.process = spawn('Xvfb', args, {
|
|
45
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
46
|
+
detached: false,
|
|
47
|
+
});
|
|
48
|
+
this.process.stderr?.on('data', (chunk) => {
|
|
49
|
+
const text = String(chunk).trim();
|
|
50
|
+
if (text)
|
|
51
|
+
logger.warn(`[xvfb] ${text}`);
|
|
52
|
+
});
|
|
53
|
+
this.process.on('exit', (code, signal) => {
|
|
54
|
+
this.exited = true;
|
|
55
|
+
if (code !== 0 && code !== null) {
|
|
56
|
+
logger.error(`[xvfb] exited unexpectedly: code=${code} signal=${signal}`);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
this.process.on('error', (err) => {
|
|
60
|
+
logger.error(`[xvfb] spawn error: ${err.message}`);
|
|
61
|
+
});
|
|
62
|
+
// Xvfb signals readiness by creating its Unix socket. Polling that socket
|
|
63
|
+
// is more reliable than `setTimeout(500)` because cold container starts
|
|
64
|
+
// are unpredictable.
|
|
65
|
+
const socketPath = `/tmp/.X11-unix/X${this.opts.displayNumber}`;
|
|
66
|
+
const startedAt = Date.now();
|
|
67
|
+
while (Date.now() - startedAt < XVFB_READY_TIMEOUT_MS) {
|
|
68
|
+
if (this.exited) {
|
|
69
|
+
throw new Error('Xvfb exited before becoming ready — check stderr above');
|
|
70
|
+
}
|
|
71
|
+
try {
|
|
72
|
+
await fs.access(socketPath);
|
|
73
|
+
logger.info(`[xvfb] ready on display ${this.display} (${this.opts.width}×${this.opts.height}) ` +
|
|
74
|
+
`after ${Date.now() - startedAt}ms`);
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
// socket not yet created — keep polling
|
|
79
|
+
}
|
|
80
|
+
await new Promise(r => setTimeout(r, XVFB_READY_POLL_MS));
|
|
81
|
+
}
|
|
82
|
+
throw new Error(`Xvfb did not become ready within ${XVFB_READY_TIMEOUT_MS}ms`);
|
|
83
|
+
}
|
|
84
|
+
async stop() {
|
|
85
|
+
if (!this.process)
|
|
86
|
+
return;
|
|
87
|
+
const proc = this.process;
|
|
88
|
+
this.process = null;
|
|
89
|
+
proc.kill('SIGTERM');
|
|
90
|
+
await new Promise(resolve => {
|
|
91
|
+
const timer = setTimeout(() => {
|
|
92
|
+
try {
|
|
93
|
+
proc.kill('SIGKILL');
|
|
94
|
+
}
|
|
95
|
+
catch { /* already dead */ }
|
|
96
|
+
resolve();
|
|
97
|
+
}, XVFB_STOP_GRACE_MS);
|
|
98
|
+
proc.on('exit', () => { clearTimeout(timer); resolve(); });
|
|
99
|
+
});
|
|
100
|
+
logger.info(`[xvfb] stopped (display ${this.display})`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
//# sourceMappingURL=xvfb-process.js.map
|