autokap 1.3.8 → 1.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.d.ts CHANGED
@@ -96,6 +96,7 @@ export declare class Browser {
96
96
  private browser;
97
97
  private context;
98
98
  private page;
99
+ private xvfb;
99
100
  private elementMap;
100
101
  private akNodeIndex;
101
102
  private poolContext;
@@ -334,6 +335,13 @@ export declare class Browser {
334
335
  resizeViewport(width: number, height: number): Promise<void>;
335
336
  get currentPage(): Page;
336
337
  get browserContext(): BrowserContext;
338
+ /**
339
+ * DISPLAY string of the Xvfb virtual screen, when this browser was launched
340
+ * with one (cloud clip capture path). `null` for headless / Mac / Windows
341
+ * launches that don't use Xvfb. Consumed by `FfmpegX11Recorder` so it knows
342
+ * which display to grab.
343
+ */
344
+ get xvfbDisplay(): string | null;
337
345
  /**
338
346
  * Observation pass for mock data generation.
339
347
  * Navigates to the given URL, waits for network idle, and records all JSON API responses.
package/dist/browser.js CHANGED
@@ -98,6 +98,7 @@ function resolveEffectivePadding(config, bbox) {
98
98
  }
99
99
  import { CAPTURE_HIDE_STYLE_ID, dismissCookiesAndWidgets, ensureCaptureHideStyles, getCaptureHideCSS, } from './cookie-dismiss.js';
100
100
  import { CHROMIUM_ARGS, browserPool } from './browser-pool.js';
101
+ import { XvfbProcess } from './xvfb-process.js';
101
102
  import { isDebugEnabled, logger } from './logger.js';
102
103
  async function withHelperTimeout(label, timeoutMs, work) {
103
104
  if (!timeoutMs || timeoutMs <= 0) {
@@ -772,6 +773,7 @@ export class Browser {
772
773
  browser = null;
773
774
  context = null;
774
775
  page = null;
776
+ xvfb = null;
775
777
  elementMap = new Map();
776
778
  akNodeIndex = new Map();
777
779
  poolContext = false;
@@ -810,50 +812,63 @@ export class Browser {
810
812
  const instance = new Browser(options);
811
813
  const deviceScaleFactor = normalizeDeviceScaleFactor(options.deviceScaleFactor);
812
814
  // Enable GPU compositor on non-Linux platforms so Chrome can render
813
- // 2880×1800 without saturating the CPU. Linux cloud runners get
814
- // SwiftShader (multi-threaded software ANGLE) instead of `--disable-gpu`
815
- // because the default CPU rasterizer caps CDP screenshots at ~165 ms each
816
- // 6 fps clips on heavy React pages. SwiftShader's JIT'd SIMD raster
817
- // paths use Fly's 8 vCPUs properly (measured: ~5× faster compositor).
818
- // Non-cloud Linux (GitHub Actions free runners, 2 vCPU) stays on the
819
- // legacy CPU path — SwiftShader has thread-pool overhead that hurts on
820
- // tight CI machines.
821
- const isCloudRunner = process.env.AUTOKAP_CLOUD_RUNNER === '1';
822
- const isLinuxCloud = process.platform === 'linux' && isCloudRunner;
823
- const baseArgs = isLinuxCloud || process.platform !== 'linux'
824
- ? CHROMIUM_ARGS.filter(arg => arg !== '--disable-gpu' && arg !== '--disable-gpu-sandbox')
825
- : CHROMIUM_ARGS;
826
- // Pin ANGLE to the platform's fast graphics backend. Chrome's default
815
+ // 2880×1800 without saturating the CPU. Linux (Docker/CI) keeps
816
+ // `--disable-gpu` from CHROMIUM_ARGS because GPU is rarely available there.
817
+ // SwiftShader was attempted for cloud Linux (v1.3.8) but caused GPU
818
+ // process crashes mid-recording Playwright's Jammy base lacks the GL
819
+ // libs SwiftShader expects. Reverted in v1.3.9.
820
+ const baseArgs = process.platform === 'linux'
821
+ ? CHROMIUM_ARGS
822
+ : CHROMIUM_ARGS.filter(arg => arg !== '--disable-gpu' && arg !== '--disable-gpu-sandbox');
823
+ // Pin ANGLE to the platform's native graphics API. Chrome's default
827
824
  // backend is OpenGL on macOS, which is far slower than Metal for the
828
825
  // compositor (measured 4 FPS vs 32 FPS at 2880×1800 on a heavy React UI).
829
- // Same story on Windows where D3D11 is the native fast path. On Linux
830
- // cloud, SwiftShader is the multi-threaded software backend.
826
+ // Same story on Windows where D3D11 is the native fast path.
831
827
  const angleArg = process.platform === 'darwin' ? '--use-angle=metal'
832
828
  : process.platform === 'win32' ? '--use-angle=d3d11'
833
- : isLinuxCloud ? '--use-angle=swiftshader'
834
- : null;
835
- // Cloud-Linux extras: route GL through ANGLE, opt in to SwiftShader
836
- // explicitly (Chromium 124+ requires `--enable-unsafe-swiftshader` since
837
- // SwiftShader was tagged unsafe for general WebGL for our headless
838
- // captures the security caveat is irrelevant), and bypass the GPU
839
- // blocklist that otherwise refuses any GPU path on unrecognized headless
840
- // hardware.
841
- const linuxCloudGpuArgs = isLinuxCloud ? [
842
- '--use-gl=angle',
843
- '--enable-unsafe-swiftshader',
844
- '--ignore-gpu-blocklist',
829
+ : null; // Linux: skip — GPU is rarely present in CI anyway
830
+ // Cloud Linux: spawn Xvfb and run Chromium headed against the virtual
831
+ // display. ffmpeg `x11grab` will then capture clips directly from Xvfb
832
+ // at a steady 30 fps, bypassing the slow CDP `Page.captureScreenshot`
833
+ // path that caps at ~6 fps under software rasterization. Headless
834
+ // Chromium on Mac/Windows local + Linux CI keeps using the CDP loop.
835
+ const useXvfb = process.platform === 'linux'
836
+ && process.env.AUTOKAP_CLOUD_RUNNER === '1'
837
+ && !options.headed;
838
+ if (useXvfb) {
839
+ instance.xvfb = new XvfbProcess({
840
+ displayNumber: 99,
841
+ width: Math.round(options.viewport.width),
842
+ height: Math.round(options.viewport.height),
843
+ });
844
+ await instance.xvfb.start();
845
+ // Chromium reads DISPLAY when launched non-headless — this directs
846
+ // rendering to the Xvfb framebuffer that ffmpeg will later capture.
847
+ process.env.DISPLAY = instance.xvfb.display;
848
+ logger.info(`[capture] Cloud clip capture: Chromium → Xvfb ${instance.xvfb.display} → ffmpeg x11grab path enabled`);
849
+ }
850
+ // Kiosk + zero-position anchor for Xvfb: Chromium normally renders its
851
+ // chrome (toolbar, address bar, tabs) above the page in headed mode.
852
+ // x11grab captures the whole screen, so the chrome would sit at the top
853
+ // of every clip. `--kiosk` removes all UI; `--window-position=0,0` and
854
+ // `--window-size` ensure the page fills the Xvfb screen exactly.
855
+ const xvfbWindowArgs = useXvfb ? [
856
+ '--kiosk',
857
+ '--window-position=0,0',
845
858
  ] : [];
846
859
  const clipArgs = [
847
860
  ...baseArgs,
848
861
  `--force-device-scale-factor=${deviceScaleFactor}`,
849
862
  `--window-size=${Math.round(options.viewport.width)},${Math.round(options.viewport.height)}`,
850
863
  ...(angleArg ? [angleArg] : []),
851
- ...linuxCloudGpuArgs,
864
+ ...xvfbWindowArgs,
852
865
  ];
853
866
  // Dedicated browser process for clip capture. Not pooled because clip
854
867
  // capture installs context-level init scripts (cursor overlay).
855
868
  instance.browser = await chromium.launch({
856
- headless: !options.headed,
869
+ // Headless: false when Xvfb is in play so Chromium actually renders
870
+ // pixels to the display (headless mode skips that work entirely).
871
+ headless: useXvfb ? false : !options.headed,
857
872
  args: clipArgs,
858
873
  });
859
874
  const contextOptions = {
@@ -1059,6 +1074,12 @@ export class Browser {
1059
1074
  this.context = null;
1060
1075
  this.page = null;
1061
1076
  }
1077
+ // Tear down Xvfb only after the browser process is gone — Chromium needs
1078
+ // a live display until it exits or it'll spam X errors on shutdown.
1079
+ if (this.xvfb) {
1080
+ await this.xvfb.stop();
1081
+ this.xvfb = null;
1082
+ }
1062
1083
  }
1063
1084
  async navigateTo(url) {
1064
1085
  const page = this.ensurePage();
@@ -5068,6 +5089,15 @@ export class Browser {
5068
5089
  get browserContext() {
5069
5090
  return this.ensureContext();
5070
5091
  }
5092
+ /**
5093
+ * DISPLAY string of the Xvfb virtual screen, when this browser was launched
5094
+ * with one (cloud clip capture path). `null` for headless / Mac / Windows
5095
+ * launches that don't use Xvfb. Consumed by `FfmpegX11Recorder` so it knows
5096
+ * which display to grab.
5097
+ */
5098
+ get xvfbDisplay() {
5099
+ return this.xvfb?.display ?? null;
5100
+ }
5071
5101
  /**
5072
5102
  * Observation pass for mock data generation.
5073
5103
  * Navigates to the given URL, waits for network idle, and records all JSON API responses.
@@ -0,0 +1,42 @@
1
+ /**
2
+ * ffmpeg x11grab recorder.
3
+ *
4
+ * Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
5
+ * fixed framerate, encoded straight to MP4 (libx264). Decouples the clip
6
+ * recording rate from Chromium's compositor speed — software-rasterized
7
+ * Linux compositors cap CDP `Page.captureScreenshot` at ~6 fps on heavy
8
+ * React UIs, which produces choppy clips. With x11grab we get a steady 30
9
+ * fps; if Chromium is slow to render, ffmpeg simply records the same frame
10
+ * twice (matches what a user would see on screen).
11
+ *
12
+ * Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
13
+ * runs for the whole browser process lifetime.
14
+ */
15
+ export interface FfmpegX11RecorderOptions {
16
+ /** DISPLAY string (e.g. `:99`). */
17
+ display: string;
18
+ /** Capture region width in pixels. Should match Xvfb screen width. */
19
+ width: number;
20
+ /** Capture region height in pixels. Should match Xvfb screen height. */
21
+ height: number;
22
+ /** Target framerate. */
23
+ fps: number;
24
+ /** Absolute path to the output .mp4 file. */
25
+ outputPath: string;
26
+ }
27
+ export interface FfmpegX11RecorderResult {
28
+ outputPath: string;
29
+ trimStartMs: number;
30
+ durationMs: number;
31
+ }
32
+ export declare class FfmpegX11Recorder {
33
+ private readonly opts;
34
+ private process;
35
+ private startedAt;
36
+ private firstFrameAt;
37
+ private lastReportedFrameLine;
38
+ private stderrTail;
39
+ constructor(opts: FfmpegX11RecorderOptions);
40
+ start(): Promise<void>;
41
+ stop(): Promise<FfmpegX11RecorderResult>;
42
+ }
@@ -0,0 +1,167 @@
1
+ /**
2
+ * ffmpeg x11grab recorder.
3
+ *
4
+ * Captures the Xvfb virtual display directly with `ffmpeg -f x11grab` at a
5
+ * fixed framerate, encoded straight to MP4 (libx264). Decouples the clip
6
+ * recording rate from Chromium's compositor speed — software-rasterized
7
+ * Linux compositors cap CDP `Page.captureScreenshot` at ~6 fps on heavy
8
+ * React UIs, which produces choppy clips. With x11grab we get a steady 30
9
+ * fps; if Chromium is slow to render, ffmpeg simply records the same frame
10
+ * twice (matches what a user would see on screen).
11
+ *
12
+ * Lifecycle: one recorder instance per BEGIN_CLIP/END_CLIP. Xvfb itself
13
+ * runs for the whole browser process lifetime.
14
+ */
15
+ import { spawn } from 'node:child_process';
16
+ import fs from 'node:fs/promises';
17
+ import { logger } from './logger.js';
18
+ const FFMPEG_FIRST_FRAME_TIMEOUT_MS = 5_000;
19
+ const FFMPEG_FIRST_FRAME_POLL_MS = 50;
20
+ const FFMPEG_GRACEFUL_STOP_MS = 3_000;
21
+ const FFMPEG_FORCE_STOP_MS = 2_000;
22
+ export class FfmpegX11Recorder {
23
+ opts;
24
+ process = null;
25
+ startedAt = 0;
26
+ firstFrameAt = 0;
27
+ lastReportedFrameLine = null;
28
+ stderrTail = [];
29
+ constructor(opts) {
30
+ this.opts = opts;
31
+ }
32
+ async start() {
33
+ if (this.process)
34
+ throw new Error('ffmpeg x11grab already running');
35
+ const { display, width, height, fps, outputPath } = this.opts;
36
+ // -draw_mouse 0: hide the X cursor — the cursor overlay script paints a
37
+ // fake cursor in the DOM that's already captured via the page.
38
+ // -preset ultrafast + -crf 20: encode in real time on 8 vCPU; CRF 20 is
39
+ // high quality (clip artifacts are visible at 28+).
40
+ // -pix_fmt yuv420p + +faststart: maximum playback compatibility (Safari,
41
+ // QuickTime, browser <video>).
42
+ const args = [
43
+ '-y',
44
+ '-loglevel', 'warning',
45
+ '-stats',
46
+ '-f', 'x11grab',
47
+ '-draw_mouse', '0',
48
+ '-framerate', String(fps),
49
+ '-video_size', `${width}x${height}`,
50
+ '-i', `${display}.0+0,0`,
51
+ '-c:v', 'libx264',
52
+ '-preset', 'ultrafast',
53
+ '-crf', '20',
54
+ '-pix_fmt', 'yuv420p',
55
+ '-movflags', '+faststart',
56
+ outputPath,
57
+ ];
58
+ logger.info(`[ffmpeg-x11] starting capture on ${display} → ${outputPath} (${width}×${height} @ ${fps}fps)`);
59
+ this.startedAt = performance.now();
60
+ // stdin is `pipe` so we can send 'q' for graceful shutdown (writes the
61
+ // moov atom; SIGTERM produces an unplayable file).
62
+ this.process = spawn('ffmpeg', args, { stdio: ['pipe', 'pipe', 'pipe'] });
63
+ let exited = false;
64
+ let exitError = null;
65
+ this.process.stderr?.on('data', (chunk) => {
66
+ const text = String(chunk);
67
+ this.stderrTail.push(text);
68
+ // Cap retained stderr at ~10 KB to avoid unbounded memory growth on
69
+ // long recordings.
70
+ while (this.stderrTail.join('').length > 10_000) {
71
+ this.stderrTail.shift();
72
+ }
73
+ // ffmpeg's progress lines look like: `frame= 42 fps=30 q=23 size= ...`
74
+ // First non-zero `frame=` value signals capture is actually streaming.
75
+ if (this.firstFrameAt === 0 && /frame=\s*[1-9]/.test(text)) {
76
+ this.firstFrameAt = performance.now();
77
+ }
78
+ // Track the latest progress line for the final summary log.
79
+ const match = text.match(/frame=\s*\d+\s+fps=[\d.]+\s+[^\n]+/);
80
+ if (match)
81
+ this.lastReportedFrameLine = match[0].trim();
82
+ });
83
+ this.process.on('exit', (code, signal) => {
84
+ exited = true;
85
+ const wasGracefulStop = signal === 'SIGTERM' || signal === 'SIGINT' || code === 0;
86
+ if (!wasGracefulStop && code !== null) {
87
+ exitError = new Error(`ffmpeg exited unexpectedly: code=${code} signal=${signal}\n` +
88
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
89
+ }
90
+ });
91
+ this.process.on('error', (err) => {
92
+ exitError = new Error(`ffmpeg spawn error: ${err.message}`);
93
+ });
94
+ // Wait for the first frame to confirm x11grab connected to Xvfb and
95
+ // encoding has begun. If ffmpeg dies before this, propagate the error.
96
+ const waitStartedAt = Date.now();
97
+ while (Date.now() - waitStartedAt < FFMPEG_FIRST_FRAME_TIMEOUT_MS) {
98
+ if (exited) {
99
+ throw exitError ?? new Error(`ffmpeg exited before first frame:\n${this.stderrTail.join('').slice(-2_000)}`);
100
+ }
101
+ if (this.firstFrameAt > 0) {
102
+ logger.info(`[ffmpeg-x11] capturing — first frame after ${Math.round(this.firstFrameAt - this.startedAt)}ms`);
103
+ return;
104
+ }
105
+ await new Promise(r => setTimeout(r, FFMPEG_FIRST_FRAME_POLL_MS));
106
+ }
107
+ throw new Error(`ffmpeg did not produce first frame within ${FFMPEG_FIRST_FRAME_TIMEOUT_MS}ms\n` +
108
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
109
+ }
110
+ async stop() {
111
+ if (!this.process)
112
+ throw new Error('ffmpeg x11grab not running');
113
+ const proc = this.process;
114
+ this.process = null;
115
+ // 'q' → ffmpeg writes the moov atom and exits cleanly. SIGTERM/SIGKILL
116
+ // would corrupt the MP4 (no moov, unplayable in browsers).
117
+ try {
118
+ proc.stdin?.write('q');
119
+ proc.stdin?.end();
120
+ }
121
+ catch { /* stdin may already be closed */ }
122
+ await new Promise(resolve => {
123
+ const sigtermTimer = setTimeout(() => {
124
+ logger.warn(`[ffmpeg-x11] did not exit within ${FFMPEG_GRACEFUL_STOP_MS}ms — sending SIGTERM`);
125
+ try {
126
+ proc.kill('SIGTERM');
127
+ }
128
+ catch { /* already dead */ }
129
+ const sigkillTimer = setTimeout(() => {
130
+ try {
131
+ proc.kill('SIGKILL');
132
+ }
133
+ catch { /* already dead */ }
134
+ resolve();
135
+ }, FFMPEG_FORCE_STOP_MS);
136
+ proc.on('exit', () => { clearTimeout(sigkillTimer); resolve(); });
137
+ }, FFMPEG_GRACEFUL_STOP_MS);
138
+ proc.on('exit', () => { clearTimeout(sigtermTimer); resolve(); });
139
+ });
140
+ const stoppedAt = performance.now();
141
+ const trimStartMs = this.firstFrameAt > 0
142
+ ? Math.max(0, this.firstFrameAt - this.startedAt)
143
+ : 0;
144
+ const durationMs = stoppedAt - this.startedAt;
145
+ let fileSize = 0;
146
+ try {
147
+ const stat = await fs.stat(this.opts.outputPath);
148
+ fileSize = stat.size;
149
+ if (fileSize === 0) {
150
+ throw new Error(`ffmpeg produced 0-byte file at ${this.opts.outputPath}`);
151
+ }
152
+ }
153
+ catch (err) {
154
+ throw new Error(`ffmpeg output unreadable: ${err.message}\n` +
155
+ `Last stderr:\n${this.stderrTail.join('').slice(-2_000)}`);
156
+ }
157
+ logger.info(`[ffmpeg-x11] finalized: ${(fileSize / 1024).toFixed(1)} KB, ` +
158
+ `${(durationMs / 1000).toFixed(2)}s wall, trim ${Math.round(trimStartMs)}ms` +
159
+ (this.lastReportedFrameLine ? ` (${this.lastReportedFrameLine})` : ''));
160
+ return {
161
+ outputPath: this.opts.outputPath,
162
+ trimStartMs,
163
+ durationMs,
164
+ };
165
+ }
166
+ }
167
+ //# sourceMappingURL=ffmpeg-x11-recorder.js.map
@@ -12,6 +12,7 @@ import { resolveTarget } from './semantic-resolver.js';
12
12
  import { logger } from './logger.js';
13
13
  import { ClipCaptureLoop } from './clip-capture-loop.js';
14
14
  import { assembleMp4FromFrames, getMediaDurationMs } from './clip-postprocess.js';
15
+ import { FfmpegX11Recorder } from './ffmpeg-x11-recorder.js';
15
16
  export class WebPlaywrightLocal {
16
17
  browser;
17
18
  recordingDir;
@@ -352,21 +353,52 @@ export class WebPlaywrightLocal {
352
353
  const cloudClipFps = isCloudRunner ? 30 : defaultFps;
353
354
  const targetFps = options.captureFps
354
355
  ?? (options.mediaMode === 'video' ? 30 : cloudClipFps);
355
- const loop = new ClipCaptureLoop({
356
- page,
357
- framesDir,
358
- targetFps,
359
- // Cloud runners have CPU headroom drop the Linux 50 ms idle cushion
360
- // (sized for tight CI runners) to let the loop stay close to its target.
361
- minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
362
- });
363
- await loop.start();
356
+ // Cloud Linux clip recording goes through Xvfb + ffmpeg x11grab. The CDP
357
+ // `Page.captureScreenshot` loop caps at ~6 fps on heavy React UIs because
358
+ // each capture waits for a full compositor frame, and software-rasterized
359
+ // Linux compositors are slow. x11grab decouples capture rate from
360
+ // compositor speed: when Chromium is slow ffmpeg just records the same
361
+ // pixels twice (matches what a user would actually see). Other paths
362
+ // (Mac/Windows local, Linux CI) keep the CDP loop — it's faster there
363
+ // because Metal/D3D11/DRI compositors render at full rate.
364
+ const xvfbDisplay = this.browser.xvfbDisplay;
365
+ const useX11Capture = options.mediaMode === 'clip' && xvfbDisplay !== null;
366
+ let loop = null;
367
+ let x11Recorder = null;
368
+ const mp4Path = path.join(baseDir, `${options.mediaMode}.mp4`);
369
+ if (useX11Capture && xvfbDisplay) {
370
+ // Use the actual rendered surface size (CSS px × DPR) so x11grab and
371
+ // Chromium agree on dimensions. On cloud DPR is capped at 1 by
372
+ // cli-runner.ts, so this matches the viewport.
373
+ const surfaceW = Math.round(page.viewportSize()?.width ?? options.captureResolution?.width ?? 1440);
374
+ const surfaceH = Math.round(page.viewportSize()?.height ?? options.captureResolution?.height ?? 900);
375
+ x11Recorder = new FfmpegX11Recorder({
376
+ display: xvfbDisplay,
377
+ width: surfaceW,
378
+ height: surfaceH,
379
+ fps: targetFps,
380
+ outputPath: mp4Path,
381
+ });
382
+ await x11Recorder.start();
383
+ }
384
+ else {
385
+ loop = new ClipCaptureLoop({
386
+ page,
387
+ framesDir,
388
+ targetFps,
389
+ // Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
390
+ // (sized for tight CI runners) to let the loop stay close to its target.
391
+ minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
392
+ });
393
+ await loop.start();
394
+ }
364
395
  this.recording = {
365
396
  mediaMode: options.mediaMode,
366
397
  startedAt: Date.now(),
367
398
  framesDir,
368
- mp4Path: path.join(baseDir, `${options.mediaMode}.mp4`),
399
+ mp4Path,
369
400
  loop,
401
+ x11Recorder,
370
402
  finalized: false,
371
403
  };
372
404
  this.clipCursor = {
@@ -461,6 +493,32 @@ export class WebPlaywrightLocal {
461
493
  this.recordingNavWatcher.detach();
462
494
  this.recordingNavWatcher = null;
463
495
  }
496
+ // Cloud Linux clip capture path: ffmpeg already wrote the final MP4
497
+ // directly from x11grab, no frame assembly needed. Stop ffmpeg cleanly
498
+ // (`q` on stdin → moov atom is finalized) and surface the output.
499
+ if (this.recording.x11Recorder) {
500
+ const x11Result = await this.recording.x11Recorder.stop();
501
+ // Tear down the browser context AFTER ffmpeg stops — closing it sooner
502
+ // would freeze Chromium's last frame mid-paint and the tail of the clip
503
+ // would show a partial render.
504
+ await this.browser.closeContext();
505
+ this.recording.finalized = true;
506
+ this.recording.sourcePath = x11Result.outputPath;
507
+ this.recording.sourceMimeType = 'video/mp4';
508
+ this.recording.trimStartMs = x11Result.trimStartMs;
509
+ this.recording.encodedDurationMs = await getMediaDurationMs(x11Result.outputPath);
510
+ this.clipCursor = null;
511
+ const buffer = await fs.readFile(x11Result.outputPath);
512
+ return {
513
+ buffer,
514
+ durationMs: this.recording.encodedDurationMs,
515
+ mimeType: 'video/mp4',
516
+ trimStartMs: x11Result.trimStartMs,
517
+ };
518
+ }
519
+ if (!this.recording.loop) {
520
+ throw new Error('recording loop was not initialized');
521
+ }
464
522
  const result = await this.recording.loop.stop();
465
523
  logger.info(`[capture] Clip frame capture: ${result.frameCount} frame(s), ` +
466
524
  `${result.measuredFps.toFixed(1)} fps over ${(result.actualDurationMs / 1000).toFixed(2)}s ` +
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Xvfb (X virtual framebuffer) process wrapper.
3
+ *
4
+ * Spins up a virtual X display that headed Chromium can render to. Used by
5
+ * cloud clip capture so the recording surface is reachable by ffmpeg via
6
+ * `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
7
+ * software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
8
+ *
9
+ * Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
10
+ * recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
11
+ * display.
12
+ */
13
+ export interface XvfbProcessOptions {
14
+ /** Display number (without leading colon). E.g. 99 → DISPLAY=:99. */
15
+ displayNumber: number;
16
+ /** Screen width in pixels. Should match the Chromium window size. */
17
+ width: number;
18
+ /** Screen height in pixels. Should match the Chromium window size. */
19
+ height: number;
20
+ }
21
+ export declare class XvfbProcess {
22
+ private readonly opts;
23
+ private process;
24
+ private exited;
25
+ constructor(opts: XvfbProcessOptions);
26
+ /** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
27
+ get display(): string;
28
+ start(): Promise<void>;
29
+ stop(): Promise<void>;
30
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Xvfb (X virtual framebuffer) process wrapper.
3
+ *
4
+ * Spins up a virtual X display that headed Chromium can render to. Used by
5
+ * cloud clip capture so the recording surface is reachable by ffmpeg via
6
+ * `x11grab` — bypassing the slow `Page.captureScreenshot` CDP path that
7
+ * software-rasterized Linux compositors cap at ~6 fps on heavy React UIs.
8
+ *
9
+ * Lifecycle: Xvfb runs for the entire browser process lifetime. ffmpeg
10
+ * recording starts/stops per BEGIN_CLIP/END_CLIP and grabs from the same
11
+ * display.
12
+ */
13
+ import { spawn } from 'node:child_process';
14
+ import fs from 'node:fs/promises';
15
+ import { logger } from './logger.js';
16
+ const XVFB_READY_TIMEOUT_MS = 5_000;
17
+ const XVFB_READY_POLL_MS = 50;
18
+ const XVFB_STOP_GRACE_MS = 2_000;
19
+ export class XvfbProcess {
20
+ opts;
21
+ process = null;
22
+ exited = false;
23
+ constructor(opts) {
24
+ this.opts = opts;
25
+ }
26
+ /** DISPLAY string suitable for `process.env.DISPLAY` (e.g. `:99`). */
27
+ get display() {
28
+ return `:${this.opts.displayNumber}`;
29
+ }
30
+ async start() {
31
+ if (this.process)
32
+ throw new Error('xvfb already started');
33
+ // -ac: no access control (any local client can connect)
34
+ // -screen 0 WxHxDEPTH: screen 0 sized to W×H at 24-bit color
35
+ // -nolisten tcp: only listen on the Unix socket (no network exposure)
36
+ // -dpi 96: pin DPI so CSS pixel sizing matches a typical monitor
37
+ const args = [
38
+ this.display,
39
+ '-ac',
40
+ '-screen', '0', `${this.opts.width}x${this.opts.height}x24`,
41
+ '-nolisten', 'tcp',
42
+ '-dpi', '96',
43
+ ];
44
+ this.process = spawn('Xvfb', args, {
45
+ stdio: ['ignore', 'pipe', 'pipe'],
46
+ detached: false,
47
+ });
48
+ this.process.stderr?.on('data', (chunk) => {
49
+ const text = String(chunk).trim();
50
+ if (text)
51
+ logger.warn(`[xvfb] ${text}`);
52
+ });
53
+ this.process.on('exit', (code, signal) => {
54
+ this.exited = true;
55
+ if (code !== 0 && code !== null) {
56
+ logger.error(`[xvfb] exited unexpectedly: code=${code} signal=${signal}`);
57
+ }
58
+ });
59
+ this.process.on('error', (err) => {
60
+ logger.error(`[xvfb] spawn error: ${err.message}`);
61
+ });
62
+ // Xvfb signals readiness by creating its Unix socket. Polling that socket
63
+ // is more reliable than `setTimeout(500)` because cold container starts
64
+ // are unpredictable.
65
+ const socketPath = `/tmp/.X11-unix/X${this.opts.displayNumber}`;
66
+ const startedAt = Date.now();
67
+ while (Date.now() - startedAt < XVFB_READY_TIMEOUT_MS) {
68
+ if (this.exited) {
69
+ throw new Error('Xvfb exited before becoming ready — check stderr above');
70
+ }
71
+ try {
72
+ await fs.access(socketPath);
73
+ logger.info(`[xvfb] ready on display ${this.display} (${this.opts.width}×${this.opts.height}) ` +
74
+ `after ${Date.now() - startedAt}ms`);
75
+ return;
76
+ }
77
+ catch {
78
+ // socket not yet created — keep polling
79
+ }
80
+ await new Promise(r => setTimeout(r, XVFB_READY_POLL_MS));
81
+ }
82
+ throw new Error(`Xvfb did not become ready within ${XVFB_READY_TIMEOUT_MS}ms`);
83
+ }
84
+ async stop() {
85
+ if (!this.process)
86
+ return;
87
+ const proc = this.process;
88
+ this.process = null;
89
+ proc.kill('SIGTERM');
90
+ await new Promise(resolve => {
91
+ const timer = setTimeout(() => {
92
+ try {
93
+ proc.kill('SIGKILL');
94
+ }
95
+ catch { /* already dead */ }
96
+ resolve();
97
+ }, XVFB_STOP_GRACE_MS);
98
+ proc.on('exit', () => { clearTimeout(timer); resolve(); });
99
+ });
100
+ logger.info(`[xvfb] stopped (display ${this.display})`);
101
+ }
102
+ }
103
+ //# sourceMappingURL=xvfb-process.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.3.8",
3
+ "version": "1.3.9",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",