autokap 1.3.15 → 1.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.d.ts CHANGED
@@ -99,6 +99,20 @@ export declare class Browser {
99
99
  private elementMap;
100
100
  private akNodeIndex;
101
101
  private poolContext;
102
+ /**
103
+ * Xvfb instance backing the headed Chromium used by clip capture on Cloud
104
+ * Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
105
+ * (Mac/Win and local Linux use headless Playwright). Lifetime matches the
106
+ * browser process — closeContext() leaves it alone, close() tears it down.
107
+ */
108
+ private xvfb;
109
+ /** Public read access for the clip recorder (needs the DISPLAY string). */
110
+ get xvfbDisplay(): string | null;
111
+ /** Viewport size used for clip capture region (matches Xvfb screen). */
112
+ get viewport(): {
113
+ width: number;
114
+ height: number;
115
+ };
102
116
  constructor(options: BrowserOptions);
103
117
  /**
104
118
  * Create a Browser using the shared pool (server/web API mode).
package/dist/browser.js CHANGED
@@ -99,6 +99,7 @@ function resolveEffectivePadding(config, bbox) {
99
99
  import { CAPTURE_HIDE_STYLE_ID, dismissCookiesAndWidgets, ensureCaptureHideStyles, getCaptureHideCSS, } from './cookie-dismiss.js';
100
100
  import { CHROMIUM_ARGS, browserPool } from './browser-pool.js';
101
101
  import { isDebugEnabled, logger } from './logger.js';
102
+ import { XvfbProcess } from './xvfb-process.js';
102
103
  async function withHelperTimeout(label, timeoutMs, work) {
103
104
  if (!timeoutMs || timeoutMs <= 0) {
104
105
  return work();
@@ -775,6 +776,17 @@ export class Browser {
775
776
  elementMap = new Map();
776
777
  akNodeIndex = new Map();
777
778
  poolContext = false;
779
+ /**
780
+ * Xvfb instance backing the headed Chromium used by clip capture on Cloud
781
+ * Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
782
+ * (Mac/Win and local Linux use headless Playwright). Lifetime matches the
783
+ * browser process — closeContext() leaves it alone, close() tears it down.
784
+ */
785
+ xvfb = null;
786
+ /** Public read access for the clip recorder (needs the DISPLAY string). */
787
+ get xvfbDisplay() { return this.xvfb?.display ?? null; }
788
+ /** Viewport size used for clip capture region (matches Xvfb screen). */
789
+ get viewport() { return this.options.viewport; }
778
790
  constructor(options) {
779
791
  this.options = options;
780
792
  }
@@ -850,17 +862,49 @@ export class Browser {
850
862
  '--enable-zero-copy',
851
863
  ]
852
864
  : [];
865
+ // Cloud Run Linux + GPU: spawn Xvfb so headed Chromium has a display to
866
+ // render into, and ffmpeg x11grab can capture that framebuffer directly
867
+ // (the FfmpegX11Recorder + h264_nvenc encoding path used by clip recording).
868
+ // The CDP Page.captureScreenshot path on cloud topped out at 9 fps even
869
+ // with the GPU compositor active because libjpeg-turbo CPU encode + CDP
870
+ // transport overhead pinned per-frame cost at ~85 ms. Capturing the X
871
+ // framebuffer with NVENC bypasses both costs.
872
+ const xvfbWidth = Math.round(options.viewport.width);
873
+ const xvfbHeight = Math.round(options.viewport.height);
874
+ if (isLinuxWithGpu) {
875
+ instance.xvfb = new XvfbProcess({
876
+ displayNumber: 99,
877
+ width: xvfbWidth,
878
+ height: xvfbHeight,
879
+ });
880
+ await instance.xvfb.start();
881
+ // Chromium picks up DISPLAY from the parent process env. ffmpeg will
882
+ // read the same display via FfmpegX11RecorderOptions.
883
+ process.env.DISPLAY = instance.xvfb.display;
884
+ logger.info(`[capture] Cloud clip capture: Chromium → Xvfb ${instance.xvfb.display} → ffmpeg x11grab + h264_nvenc path enabled`);
885
+ }
886
+ // Kiosk + zero-position anchor for Xvfb: Chromium normally renders its
887
+ // own toolbar/tabbar in headed mode, which would appear at the top of
888
+ // every clip. `--kiosk` removes all UI; `--window-position=0,0` and
889
+ // `--window-size` make the page fill the Xvfb screen exactly.
890
+ const xvfbWindowArgs = isLinuxWithGpu ? [
891
+ '--kiosk',
892
+ '--window-position=0,0',
893
+ ] : [];
853
894
  const clipArgs = [
854
895
  ...baseArgs,
855
896
  `--force-device-scale-factor=${deviceScaleFactor}`,
856
- `--window-size=${Math.round(options.viewport.width)},${Math.round(options.viewport.height)}`,
897
+ `--window-size=${xvfbWidth},${xvfbHeight}`,
857
898
  ...(angleArg ? [angleArg] : []),
858
899
  ...cloudGpuArgs,
900
+ ...xvfbWindowArgs,
859
901
  ];
860
902
  // Dedicated browser process for clip capture. Not pooled because clip
861
903
  // capture installs context-level init scripts (cursor overlay).
904
+ // Cloud Run with Xvfb: launch headed (headless: false) so Chromium
905
+ // renders to the Xvfb framebuffer that ffmpeg captures.
862
906
  instance.browser = await chromium.launch({
863
- headless: !options.headed,
907
+ headless: isLinuxWithGpu ? false : !options.headed,
864
908
  args: clipArgs,
865
909
  });
866
910
  const contextOptions = {
@@ -1096,6 +1140,17 @@ export class Browser {
1096
1140
  this.context = null;
1097
1141
  this.page = null;
1098
1142
  }
1143
+ // Tear down Xvfb only after Chromium is fully gone — Chromium needs the
1144
+ // X display for its own teardown (releasing GL contexts, X resources).
1145
+ if (this.xvfb) {
1146
+ try {
1147
+ await this.xvfb.stop();
1148
+ }
1149
+ catch (err) {
1150
+ logger.warn(`[xvfb] stop failed: ${err.message}`);
1151
+ }
1152
+ this.xvfb = null;
1153
+ }
1099
1154
  }
1100
1155
  async navigateTo(url) {
1101
1156
  const page = this.ensurePage();
@@ -45,7 +45,13 @@ export class ClipCaptureLoop {
45
45
  const targetFps = Math.max(1, Math.min(30, opts.targetFps ?? platformDefault));
46
46
  this.targetFps = targetFps;
47
47
  this.targetFrameIntervalMs = 1000 / targetFps;
48
- const linuxMinRest = isCloudRunner ? 16 : 50;
48
+ // minRestMs adds a forced sleep after each frame, intended to yield the
49
+ // event loop on slow runners. With Cloud Run + L4 GPU, CDP capture itself
50
+ // takes ~85ms, so ANY non-zero minRestMs caps observed FPS below the
51
+ // already-low CDP ceiling (16ms forced rest = 9.9 fps cap, matched the
52
+ // 1.3.15 plateau exactly). Drop to 0 on cloud — the await on the next
53
+ // CDP send already yields the loop.
54
+ const linuxMinRest = isCloudRunner ? 0 : 50;
49
55
  const platformMinRest = process.platform === 'linux' ? linuxMinRest : 16;
50
56
  this.minRestMs = Math.max(0, Math.min(250, opts.minRestMs ?? platformMinRest));
51
57
  }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * ffmpeg x11grab recorder with NVIDIA NVENC hardware encoder.
3
+ *
4
+ * Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
5
+ * fixed framerate, encoded straight to MP4 via `h264_nvenc` running on the
6
+ * Cloud Run NVIDIA L4 GPU. Decouples clip recording from Chromium's
7
+ * compositor speed AND from the CPU JPEG encoder that capped the previous
8
+ * CDP `Page.captureScreenshot` path at ~9 fps.
9
+ *
10
+ * Why this works on Cloud Run when v1.3.10 (libx264) didn't:
11
+ * - v1.3.10 used CPU encoder (libx264) which competed with Chromium for
12
+ * vCPU on the same machine, giving 1.83 fps.
13
+ * - h264_nvenc offloads the entire encode to dedicated NVIDIA NVENC silicon
14
+ * on the L4 (separate from CUDA cores). Encode is essentially free CPU-side.
15
+ * - The L4 has 2× NVENC blocks and can sustain multiple 4K streams in
16
+ * parallel, so 1440x900@30fps is trivial.
17
+ *
18
+ * Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
19
+ * runs for the whole browser process lifetime.
20
+ *
21
+ * Refs:
22
+ * - https://docs.cloud.google.com/run/docs/tutorials/video-encoding
23
+ * - https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/ffmpeg-with-nvidia-gpu/index.html
24
+ */
25
+ export interface FfmpegX11RecorderOptions {
26
+ /** DISPLAY string (e.g. `:99`). */
27
+ display: string;
28
+ /** Capture region width in pixels. Should match Xvfb screen width. */
29
+ width: number;
30
+ /** Capture region height in pixels. Should match Xvfb screen height. */
31
+ height: number;
32
+ /** Target framerate. */
33
+ fps: number;
34
+ /** Absolute path to the output .mp4 file. */
35
+ outputPath: string;
36
+ }
37
+ export interface FfmpegX11RecorderResult {
38
+ outputPath: string;
39
+ trimStartMs: number;
40
+ durationMs: number;
41
+ }
42
+ export declare class FfmpegX11Recorder {
43
+ private readonly opts;
44
+ private process;
45
+ private startedAt;
46
+ private firstFrameAt;
47
+ private lastReportedFrameLine;
48
+ private stderrTail;
49
+ constructor(opts: FfmpegX11RecorderOptions);
50
+ start(): Promise<void>;
51
+ stop(): Promise<FfmpegX11RecorderResult>;
52
+ }
@@ -0,0 +1,193 @@
1
+ /**
2
+ * ffmpeg x11grab recorder with NVIDIA NVENC hardware encoder.
3
+ *
4
+ * Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
5
+ * fixed framerate, encoded straight to MP4 via `h264_nvenc` running on the
6
+ * Cloud Run NVIDIA L4 GPU. Decouples clip recording from Chromium's
7
+ * compositor speed AND from the CPU JPEG encoder that capped the previous
8
+ * CDP `Page.captureScreenshot` path at ~9 fps.
9
+ *
10
+ * Why this works on Cloud Run when v1.3.10 (libx264) didn't:
11
+ * - v1.3.10 used CPU encoder (libx264) which competed with Chromium for
12
+ * vCPU on the same machine, giving 1.83 fps.
13
+ * - h264_nvenc offloads the entire encode to dedicated NVIDIA NVENC silicon
14
+ * on the L4 (separate from CUDA cores). Encode is essentially free CPU-side.
15
+ * - The L4 has 2× NVENC blocks and can sustain multiple 4K streams in
16
+ * parallel, so 1440x900@30fps is trivial.
17
+ *
18
+ * Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
19
+ * runs for the whole browser process lifetime.
20
+ *
21
+ * Refs:
22
+ * - https://docs.cloud.google.com/run/docs/tutorials/video-encoding
23
+ * - https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/ffmpeg-with-nvidia-gpu/index.html
24
+ */
25
+ import { spawn } from 'node:child_process';
26
+ import fs from 'node:fs/promises';
27
+ import { logger } from './logger.js';
28
+ const FFMPEG_FIRST_FRAME_TIMEOUT_MS = 5_000;
29
+ const FFMPEG_FIRST_FRAME_POLL_MS = 50;
30
+ const FFMPEG_GRACEFUL_STOP_MS = 3_000;
31
+ const FFMPEG_FORCE_STOP_MS = 2_000;
32
+ export class FfmpegX11Recorder {
33
+ opts;
34
+ process = null;
35
+ startedAt = 0;
36
+ firstFrameAt = 0;
37
+ lastReportedFrameLine = null;
38
+ stderrTail = [];
39
+ constructor(opts) {
40
+ this.opts = opts;
41
+ }
42
+ async start() {
43
+ if (this.process)
44
+ throw new Error('ffmpeg x11grab already running');
45
+ const { display, width, height, fps, outputPath } = this.opts;
46
+ // -draw_mouse 0: hide the X cursor — the cursor overlay script paints a
47
+ // fake cursor in the DOM that's already captured via the page.
48
+ // -c:v h264_nvenc: NVIDIA hardware encoder. Eliminates CPU contention
49
+ // with Chromium that pinned v1.3.10 (libx264) at 1.83 fps.
50
+ // -preset p4 + -tune ll: NVENC preset 4 (balanced) with low-latency
51
+ // tuning. p1 is fastest but lower quality, p7 is highest quality but
52
+ // slower. p4 is the standard real-time recording preset per NVIDIA's
53
+ // FFmpeg+GPU guide. -tune ll skips B-frames and sets LL-friendly
54
+ // reference patterns so each frame can be encoded independently.
55
+ // -rc cbr -b:v 5M: constant bitrate 5 Mbps — produces smooth quality
56
+ // for screen content at 1440x900. CRF/CQ modes are CPU-side only;
57
+ // NVENC supports CBR/VBR/cqp.
58
+ // -pix_fmt yuv420p + +faststart: maximum playback compatibility (Safari,
59
+ // QuickTime, browser <video>). NVENC ingests RGBA from x11grab and
60
+ // converts internally.
61
+ const args = [
62
+ '-y',
63
+ '-loglevel', 'warning',
64
+ '-stats',
65
+ '-f', 'x11grab',
66
+ '-draw_mouse', '0',
67
+ '-framerate', String(fps),
68
+ '-video_size', `${width}x${height}`,
69
+ '-i', `${display}.0+0,0`,
70
+ '-c:v', 'h264_nvenc',
71
+ '-preset', 'p4',
72
+ '-tune', 'll',
73
+ '-rc', 'cbr',
74
+ '-b:v', '5M',
75
+ '-pix_fmt', 'yuv420p',
76
+ '-movflags', '+faststart',
77
+ outputPath,
78
+ ];
79
+ // The cloud-runner image ships ONE ffmpeg binary at /usr/local/bin built
80
+ // from BtbN/FFmpeg-Builds with both x11grab AND h264_nvenc enabled.
81
+ // AUTOKAP_FFMPEG_X11_BIN can override for local testing or alternate
82
+ // builds. Falls back to PATH ffmpeg.
83
+ const ffmpegBin = process.env.AUTOKAP_FFMPEG_X11_BIN || 'ffmpeg';
84
+ logger.info(`[ffmpeg-x11] starting capture on ${display} → ${outputPath} (${width}×${height} @ ${fps}fps, encoder=h264_nvenc, bin=${ffmpegBin})`);
85
+ this.startedAt = performance.now();
86
+ // stdin is `pipe` so we can send 'q' for graceful shutdown (writes the
87
+ // moov atom; SIGTERM produces an unplayable file).
88
+ this.process = spawn(ffmpegBin, args, { stdio: ['pipe', 'pipe', 'pipe'] });
89
+ let exited = false;
90
+ let exitError = null;
91
+ this.process.stderr?.on('data', (chunk) => {
92
+ const text = String(chunk);
93
+ this.stderrTail.push(text);
94
+ // Cap retained stderr at ~10 KB to avoid unbounded memory growth on
95
+ // long recordings.
96
+ while (this.stderrTail.join('').length > 10_000) {
97
+ this.stderrTail.shift();
98
+ }
99
+ // ffmpeg's progress lines look like: `frame= 42 fps=30 q=23 size= ...`
100
+ // First non-zero `frame=` value signals capture is actually streaming.
101
+ if (this.firstFrameAt === 0 && /frame=\s*[1-9]/.test(text)) {
102
+ this.firstFrameAt = performance.now();
103
+ }
104
+ // Track the latest progress line for the final summary log.
105
+ const match = text.match(/frame=\s*\d+\s+fps=[\d.]+\s+[^\n]+/);
106
+ if (match)
107
+ this.lastReportedFrameLine = match[0].trim();
108
+ });
109
+ this.process.on('exit', (code, signal) => {
110
+ exited = true;
111
+ const wasGracefulStop = signal === 'SIGTERM' || signal === 'SIGINT' || code === 0;
112
+ if (!wasGracefulStop && code !== null) {
113
+ exitError = new Error(`ffmpeg exited unexpectedly: code=${code} signal=${signal}\n` +
114
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
115
+ }
116
+ });
117
+ this.process.on('error', (err) => {
118
+ exitError = new Error(`ffmpeg spawn error: ${err.message}`);
119
+ });
120
+ // Wait for the first frame to confirm x11grab connected to Xvfb and
121
+ // encoding has begun. If ffmpeg dies before this, propagate the error.
122
+ const waitStartedAt = Date.now();
123
+ while (Date.now() - waitStartedAt < FFMPEG_FIRST_FRAME_TIMEOUT_MS) {
124
+ if (exited) {
125
+ throw exitError ?? new Error(`ffmpeg exited before first frame:\n${this.stderrTail.join('').slice(-2_000)}`);
126
+ }
127
+ if (this.firstFrameAt > 0) {
128
+ logger.info(`[ffmpeg-x11] capturing — first frame after ${Math.round(this.firstFrameAt - this.startedAt)}ms`);
129
+ return;
130
+ }
131
+ await new Promise(r => setTimeout(r, FFMPEG_FIRST_FRAME_POLL_MS));
132
+ }
133
+ throw new Error(`ffmpeg did not produce first frame within ${FFMPEG_FIRST_FRAME_TIMEOUT_MS}ms\n` +
134
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
135
+ }
136
+ async stop() {
137
+ if (!this.process)
138
+ throw new Error('ffmpeg x11grab not running');
139
+ const proc = this.process;
140
+ this.process = null;
141
+ // 'q' → ffmpeg writes the moov atom and exits cleanly. SIGTERM/SIGKILL
142
+ // would corrupt the MP4 (no moov, unplayable in browsers).
143
+ try {
144
+ proc.stdin?.write('q');
145
+ proc.stdin?.end();
146
+ }
147
+ catch { /* stdin may already be closed */ }
148
+ await new Promise(resolve => {
149
+ const sigtermTimer = setTimeout(() => {
150
+ logger.warn(`[ffmpeg-x11] did not exit within ${FFMPEG_GRACEFUL_STOP_MS}ms — sending SIGTERM`);
151
+ try {
152
+ proc.kill('SIGTERM');
153
+ }
154
+ catch { /* already dead */ }
155
+ const sigkillTimer = setTimeout(() => {
156
+ try {
157
+ proc.kill('SIGKILL');
158
+ }
159
+ catch { /* already dead */ }
160
+ resolve();
161
+ }, FFMPEG_FORCE_STOP_MS);
162
+ proc.on('exit', () => { clearTimeout(sigkillTimer); resolve(); });
163
+ }, FFMPEG_GRACEFUL_STOP_MS);
164
+ proc.on('exit', () => { clearTimeout(sigtermTimer); resolve(); });
165
+ });
166
+ const stoppedAt = performance.now();
167
+ const trimStartMs = this.firstFrameAt > 0
168
+ ? Math.max(0, this.firstFrameAt - this.startedAt)
169
+ : 0;
170
+ const durationMs = stoppedAt - this.startedAt;
171
+ let fileSize = 0;
172
+ try {
173
+ const stat = await fs.stat(this.opts.outputPath);
174
+ fileSize = stat.size;
175
+ if (fileSize === 0) {
176
+ throw new Error(`ffmpeg produced 0-byte file at ${this.opts.outputPath}`);
177
+ }
178
+ }
179
+ catch (err) {
180
+ throw new Error(`ffmpeg output unreadable: ${err.message}\n` +
181
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
182
+ }
183
+ logger.info(`[ffmpeg-x11] finalized: ${(fileSize / 1024).toFixed(1)} KB, ` +
184
+ `${(durationMs / 1000).toFixed(2)}s wall, trim ${Math.round(trimStartMs)}ms` +
185
+ (this.lastReportedFrameLine ? ` (${this.lastReportedFrameLine})` : ''));
186
+ return {
187
+ outputPath: this.opts.outputPath,
188
+ trimStartMs,
189
+ durationMs,
190
+ };
191
+ }
192
+ }
193
+ //# sourceMappingURL=ffmpeg-x11-recorder.js.map
@@ -11,6 +11,7 @@ import { humanType, moveMouse, } from './mouse-animation.js';
11
11
  import { resolveTarget } from './semantic-resolver.js';
12
12
  import { logger } from './logger.js';
13
13
  import { ClipCaptureLoop } from './clip-capture-loop.js';
14
+ import { FfmpegX11Recorder } from './ffmpeg-x11-recorder.js';
14
15
  import { assembleMp4FromFrames, getMediaDurationMs } from './clip-postprocess.js';
15
16
  export class WebPlaywrightLocal {
16
17
  browser;
@@ -352,23 +353,51 @@ export class WebPlaywrightLocal {
352
353
  const cloudClipFps = isCloudRunner ? 30 : defaultFps;
353
354
  const targetFps = options.captureFps
354
355
  ?? (options.mediaMode === 'video' ? 30 : cloudClipFps);
355
- const loop = new ClipCaptureLoop({
356
- page,
357
- framesDir,
358
- targetFps,
359
- // Cloud runners have CPU headroom drop the Linux 50 ms idle cushion
360
- // (sized for tight CI runners) to let the loop stay close to its target.
361
- minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
362
- });
363
- await loop.start();
364
- this.recording = {
365
- mediaMode: options.mediaMode,
366
- startedAt: Date.now(),
367
- framesDir,
368
- mp4Path: path.join(baseDir, `${options.mediaMode}.mp4`),
369
- loop,
370
- finalized: false,
371
- };
356
+ const mp4Path = path.join(baseDir, `${options.mediaMode}.mp4`);
357
+ // Cloud Run + NVIDIA L4: capture the Xvfb framebuffer with ffmpeg x11grab
358
+ // and encode via h264_nvenc on the GPU. Bypasses both the CDP transport
359
+ // overhead and the libjpeg-turbo CPU encode that capped CDP screenshot
360
+ // capture at 9 fps. The Browser instance spawns Xvfb in forClipCapture
361
+ // when AUTOKAP_CLOUD_RUNNER=1; xvfbDisplay is the gating signal.
362
+ const xvfbDisplay = this.browser.xvfbDisplay;
363
+ if (isCloudRunner && xvfbDisplay) {
364
+ const viewport = this.browser.viewport;
365
+ const ffmpegRecorder = new FfmpegX11Recorder({
366
+ display: xvfbDisplay,
367
+ width: Math.round(viewport.width),
368
+ height: Math.round(viewport.height),
369
+ fps: targetFps,
370
+ outputPath: mp4Path,
371
+ });
372
+ await ffmpegRecorder.start();
373
+ this.recording = {
374
+ mediaMode: options.mediaMode,
375
+ startedAt: Date.now(),
376
+ framesDir,
377
+ mp4Path,
378
+ ffmpegRecorder,
379
+ finalized: false,
380
+ };
381
+ }
382
+ else {
383
+ const loop = new ClipCaptureLoop({
384
+ page,
385
+ framesDir,
386
+ targetFps,
387
+ // Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
388
+ // (sized for tight CI runners) to let the loop stay close to its target.
389
+ minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
390
+ });
391
+ await loop.start();
392
+ this.recording = {
393
+ mediaMode: options.mediaMode,
394
+ startedAt: Date.now(),
395
+ framesDir,
396
+ mp4Path,
397
+ loop,
398
+ finalized: false,
399
+ };
400
+ }
372
401
  this.clipCursor = {
373
402
  currentPosition: null,
374
403
  pace: options.mediaMode === 'video' ? 'natural' : 'fast',
@@ -461,6 +490,32 @@ export class WebPlaywrightLocal {
461
490
  this.recordingNavWatcher.detach();
462
491
  this.recordingNavWatcher = null;
463
492
  }
493
+ // Cloud Run + NVENC path: ffmpeg has been recording the Xvfb framebuffer
494
+ // straight to MP4. stop() finalizes the moov atom; the file is already a
495
+ // playable H.264/AAC MP4. No JPEG concat needed.
496
+ if (this.recording.ffmpegRecorder) {
497
+ const ffmpegResult = await this.recording.ffmpegRecorder.stop();
498
+ logger.info(`[capture] Clip ffmpeg+nvenc capture: ${(ffmpegResult.durationMs / 1000).toFixed(2)}s wall, ` +
499
+ `trim ${Math.round(ffmpegResult.trimStartMs)}ms, output ${ffmpegResult.outputPath}`);
500
+ await this.browser.closeContext();
501
+ this.recording.finalized = true;
502
+ this.recording.ffmpegResult = ffmpegResult;
503
+ this.recording.sourcePath = ffmpegResult.outputPath;
504
+ this.recording.sourceMimeType = 'video/mp4';
505
+ this.recording.trimStartMs = ffmpegResult.trimStartMs;
506
+ this.recording.encodedDurationMs = await getMediaDurationMs(ffmpegResult.outputPath);
507
+ this.clipCursor = null;
508
+ const buffer = await fs.readFile(ffmpegResult.outputPath);
509
+ return {
510
+ buffer,
511
+ durationMs: this.recording.encodedDurationMs,
512
+ mimeType: 'video/mp4',
513
+ trimStartMs: ffmpegResult.trimStartMs,
514
+ };
515
+ }
516
+ if (!this.recording.loop) {
517
+ throw new Error('recording started without a loop or ffmpeg recorder');
518
+ }
464
519
  const result = await this.recording.loop.stop();
465
520
  logger.info(`[capture] Clip frame capture: ${result.frameCount} frame(s), ` +
466
521
  `${result.measuredFps.toFixed(1)} fps over ${(result.actualDurationMs / 1000).toFixed(2)}s ` +
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Xvfb (X virtual framebuffer) process wrapper.
3
+ *
4
+ * Spins up a virtual X display that headed Chromium can render to. Used by
5
+ * cloud clip capture so the recording surface is reachable by ffmpeg via
6
+ * `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
7
+ * software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
8
+ *
9
+ * Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
10
+ * recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
11
+ * display.
12
+ */
13
+ export interface XvfbProcessOptions {
14
+ /** Display number (without leading colon). E.g. 99 → DISPLAY=:99. */
15
+ displayNumber: number;
16
+ /** Screen width in pixels. Should match the Chromium window size. */
17
+ width: number;
18
+ /** Screen height in pixels. Should match the Chromium window size. */
19
+ height: number;
20
+ }
21
+ export declare class XvfbProcess {
22
+ private readonly opts;
23
+ private process;
24
+ private exited;
25
+ constructor(opts: XvfbProcessOptions);
26
+ /** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
27
+ get display(): string;
28
+ start(): Promise<void>;
29
+ stop(): Promise<void>;
30
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Xvfb (X virtual framebuffer) process wrapper.
3
+ *
4
+ * Spins up a virtual X display that headed Chromium can render to. Used by
5
+ * cloud clip capture so the recording surface is reachable by ffmpeg via
6
+ * `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
7
+ * software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
8
+ *
9
+ * Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
10
+ * recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
11
+ * display.
12
+ */
13
+ import { spawn } from 'node:child_process';
14
+ import fs from 'node:fs/promises';
15
+ import { logger } from './logger.js';
16
+ const XVFB_READY_TIMEOUT_MS = 5_000;
17
+ const XVFB_READY_POLL_MS = 50;
18
+ const XVFB_STOP_GRACE_MS = 2_000;
19
+ export class XvfbProcess {
20
+ opts;
21
+ process = null;
22
+ exited = false;
23
+ constructor(opts) {
24
+ this.opts = opts;
25
+ }
26
+ /** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
27
+ get display() {
28
+ return `:${this.opts.displayNumber}`;
29
+ }
30
+ async start() {
31
+ if (this.process)
32
+ throw new Error('xvfb already started');
33
+ // -ac: no access control (any local client can connect)
34
+ // -screen 0 WxHxDEPTH: screen 0 sized to W×H at 24-bit color
35
+ // -nolisten tcp: only listen on the Unix socket (no network exposure)
36
+ // -dpi 96: pin DPI so CSS pixel sizing matches a typical monitor
37
+ const args = [
38
+ this.display,
39
+ '-ac',
40
+ '-screen', '0', `${this.opts.width}x${this.opts.height}x24`,
41
+ '-nolisten', 'tcp',
42
+ '-dpi', '96',
43
+ ];
44
+ this.process = spawn('Xvfb', args, {
45
+ stdio: ['ignore', 'pipe', 'pipe'],
46
+ detached: false,
47
+ });
48
+ this.process.stderr?.on('data', (chunk) => {
49
+ const text = String(chunk).trim();
50
+ if (text)
51
+ logger.warn(`[xvfb] ${text}`);
52
+ });
53
+ this.process.on('exit', (code, signal) => {
54
+ this.exited = true;
55
+ if (code !== 0 && code !== null) {
56
+ logger.error(`[xvfb] exited unexpectedly: code=${code} signal=${signal}`);
57
+ }
58
+ });
59
+ this.process.on('error', (err) => {
60
+ logger.error(`[xvfb] spawn error: ${err.message}`);
61
+ });
62
+ // Xvfb signals readiness by creating its Unix socket. Polling that socket
63
+ // is more reliable than `setTimeout(500)` because cold container starts
64
+ // are unpredictable.
65
+ const socketPath = `/tmp/.X11-unix/X${this.opts.displayNumber}`;
66
+ const startedAt = Date.now();
67
+ while (Date.now() - startedAt < XVFB_READY_TIMEOUT_MS) {
68
+ if (this.exited) {
69
+ throw new Error('Xvfb exited before becoming ready — check stderr above');
70
+ }
71
+ try {
72
+ await fs.access(socketPath);
73
+ logger.info(`[xvfb] ready on display ${this.display} (${this.opts.width}×${this.opts.height}) ` +
74
+ `after ${Date.now() - startedAt}ms`);
75
+ return;
76
+ }
77
+ catch {
78
+ // socket not yet created — keep polling
79
+ }
80
+ await new Promise(r => setTimeout(r, XVFB_READY_POLL_MS));
81
+ }
82
+ throw new Error(`Xvfb did not become ready within ${XVFB_READY_TIMEOUT_MS}ms`);
83
+ }
84
+ async stop() {
85
+ if (!this.process)
86
+ return;
87
+ const proc = this.process;
88
+ this.process = null;
89
+ proc.kill('SIGTERM');
90
+ await new Promise(resolve => {
91
+ const timer = setTimeout(() => {
92
+ try {
93
+ proc.kill('SIGKILL');
94
+ }
95
+ catch { /* already dead */ }
96
+ resolve();
97
+ }, XVFB_STOP_GRACE_MS);
98
+ proc.on('exit', () => { clearTimeout(timer); resolve(); });
99
+ });
100
+ logger.info(`[xvfb] stopped (display ${this.display})`);
101
+ }
102
+ }
103
+ //# sourceMappingURL=xvfb-process.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.3.15",
3
+ "version": "1.3.17",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",