autokap 1.3.6 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.d.ts CHANGED
@@ -96,7 +96,6 @@ export declare class Browser {
96
96
  private browser;
97
97
  private context;
98
98
  private page;
99
- private nativeVideoStartedAt;
100
99
  private elementMap;
101
100
  private akNodeIndex;
102
101
  private poolContext;
@@ -111,15 +110,15 @@ export declare class Browser {
111
110
  * Create a Browser dedicated to clip capture. Frames are pulled via CDP
112
111
  * `Page.captureScreenshot` in a tight loop by `ClipCaptureLoop` — NOT via
113
112
  * Playwright's built-in `recordVideo` (which plateaus at 27 FPS with ~12%
114
- * duplicates due to the CDP screencast throttler).
113
+ * duplicates due to the CDP screencast throttler, and on cloud Linux runs
114
+ * the WebM encoder on the compositor thread → frame drops to ~4 fps under
115
+ * software rasterization).
115
116
  *
116
117
  * Preserves the HiDPI rendering path (`--force-device-scale-factor` +
117
118
  * `--window-size`) so the captured frames match viewport × DSF pixels, and
118
119
  * the cursor overlay script so clicks/hover moments are visible.
119
120
  */
120
- static forClipCapture(options: BrowserOptions, cursorScript: string, recording?: {
121
- nativeVideoDir?: string;
122
- }): Promise<Browser>;
121
+ static forClipCapture(options: BrowserOptions, cursorScript: string): Promise<Browser>;
123
122
  /**
124
123
  * Close only the browser context (not the browser process).
125
124
  * Used by clip capture to release the context promptly after the CDP loop
@@ -335,7 +334,6 @@ export declare class Browser {
335
334
  resizeViewport(width: number, height: number): Promise<void>;
336
335
  get currentPage(): Page;
337
336
  get browserContext(): BrowserContext;
338
- get nativeVideoStartTime(): number | null;
339
337
  /**
340
338
  * Observation pass for mock data generation.
341
339
  * Navigates to the given URL, waits for network idle, and records all JSON API responses.
package/dist/browser.js CHANGED
@@ -772,7 +772,6 @@ export class Browser {
772
772
  browser = null;
773
773
  context = null;
774
774
  page = null;
775
- nativeVideoStartedAt = null;
776
775
  elementMap = new Map();
777
776
  akNodeIndex = new Map();
778
777
  poolContext = false;
@@ -799,33 +798,57 @@ export class Browser {
799
798
  * Create a Browser dedicated to clip capture. Frames are pulled via CDP
800
799
  * `Page.captureScreenshot` in a tight loop by `ClipCaptureLoop` — NOT via
801
800
  * Playwright's built-in `recordVideo` (which plateaus at 27 FPS with ~12%
802
- * duplicates due to the CDP screencast throttler).
801
+ * duplicates due to the CDP screencast throttler, and on cloud Linux runs
802
+ * the WebM encoder on the compositor thread → frame drops to ~4 fps under
803
+ * software rasterization).
803
804
  *
804
805
  * Preserves the HiDPI rendering path (`--force-device-scale-factor` +
805
806
  * `--window-size`) so the captured frames match viewport × DSF pixels, and
806
807
  * the cursor overlay script so clicks/hover moments are visible.
807
808
  */
808
- static async forClipCapture(options, cursorScript, recording) {
809
+ static async forClipCapture(options, cursorScript) {
809
810
  const instance = new Browser(options);
810
811
  const deviceScaleFactor = normalizeDeviceScaleFactor(options.deviceScaleFactor);
811
812
  // Enable GPU compositor on non-Linux platforms so Chrome can render
812
- // 2880×1800 without saturating the CPU. Linux (Docker/CI) keeps
813
- // `--disable-gpu` from CHROMIUM_ARGS because GPU is rarely available there.
814
- const baseArgs = process.platform === 'linux'
815
- ? CHROMIUM_ARGS
816
- : CHROMIUM_ARGS.filter(arg => arg !== '--disable-gpu' && arg !== '--disable-gpu-sandbox');
817
- // Pin ANGLE to the platform's native graphics API. Chrome's default
813
+ // 2880×1800 without saturating the CPU. Linux cloud runners get
814
+ // SwiftShader (multi-threaded software ANGLE) instead of `--disable-gpu`
815
+ // because the default CPU rasterizer caps CDP screenshots at ~165 ms each
816
+ // → 6 fps clips on heavy React pages. SwiftShader's JIT'd SIMD raster
817
+ // paths use Fly's 8 vCPUs properly (measured: ~5× faster compositor).
818
+ // Non-cloud Linux (GitHub Actions free runners, 2 vCPU) stays on the
819
+ // legacy CPU path — SwiftShader has thread-pool overhead that hurts on
820
+ // tight CI machines.
821
+ const isCloudRunner = process.env.AUTOKAP_CLOUD_RUNNER === '1';
822
+ const isLinuxCloud = process.platform === 'linux' && isCloudRunner;
823
+ const baseArgs = isLinuxCloud || process.platform !== 'linux'
824
+ ? CHROMIUM_ARGS.filter(arg => arg !== '--disable-gpu' && arg !== '--disable-gpu-sandbox')
825
+ : CHROMIUM_ARGS;
826
+ // Pin ANGLE to the platform's fast graphics backend. Chrome's default
818
827
  // backend is OpenGL on macOS, which is far slower than Metal for the
819
828
  // compositor (measured 4 FPS vs 32 FPS at 2880×1800 on a heavy React UI).
820
- // Same story on Windows where D3D11 is the native fast path.
829
+ // Same story on Windows where D3D11 is the native fast path. On Linux
830
+ // cloud, SwiftShader is the multi-threaded software backend.
821
831
  const angleArg = process.platform === 'darwin' ? '--use-angle=metal'
822
832
  : process.platform === 'win32' ? '--use-angle=d3d11'
823
- : null; // Linux: skip — GPU is rarely present in CI anyway
833
+ : isLinuxCloud ? '--use-angle=swiftshader'
834
+ : null;
835
+ // Cloud-Linux extras: route GL through ANGLE, opt in to SwiftShader
836
+ // explicitly (Chromium 124+ requires `--enable-unsafe-swiftshader` since
837
+ // SwiftShader was tagged unsafe for general WebGL — for our headless
838
+ // captures the security caveat is irrelevant), and bypass the GPU
839
+ // blocklist that otherwise refuses any GPU path on unrecognized headless
840
+ // hardware.
841
+ const linuxCloudGpuArgs = isLinuxCloud ? [
842
+ '--use-gl=angle',
843
+ '--enable-unsafe-swiftshader',
844
+ '--ignore-gpu-blocklist',
845
+ ] : [];
824
846
  const clipArgs = [
825
847
  ...baseArgs,
826
848
  `--force-device-scale-factor=${deviceScaleFactor}`,
827
849
  `--window-size=${Math.round(options.viewport.width)},${Math.round(options.viewport.height)}`,
828
850
  ...(angleArg ? [angleArg] : []),
851
+ ...linuxCloudGpuArgs,
829
852
  ];
830
853
  // Dedicated browser process for clip capture. Not pooled because clip
831
854
  // capture installs context-level init scripts (cursor overlay).
@@ -840,15 +863,6 @@ export class Browser {
840
863
  colorScheme: options.colorScheme ?? 'light',
841
864
  storageState: options.storageState,
842
865
  };
843
- if (recording?.nativeVideoDir) {
844
- contextOptions.recordVideo = {
845
- dir: recording.nativeVideoDir,
846
- size: {
847
- width: Math.round(options.viewport.width),
848
- height: Math.round(options.viewport.height),
849
- },
850
- };
851
- }
852
866
  instance.context = await instance.browser.newContext(contextOptions);
853
867
  // Inject cursor overlay at context level — survives all navigations in this session
854
868
  await instance.context.addInitScript(cursorScript);
@@ -874,11 +888,7 @@ export class Browser {
874
888
  document.addEventListener('DOMContentLoaded', install, { once: true });
875
889
  }
876
890
  }, { styleId: CAPTURE_HIDE_STYLE_ID, css: getCaptureHideCSS() });
877
- const nativeVideoStartedAt = Date.now();
878
891
  instance.page = await instance.context.newPage();
879
- if (recording?.nativeVideoDir) {
880
- instance.nativeVideoStartedAt = nativeVideoStartedAt;
881
- }
882
892
  return instance;
883
893
  }
884
894
  /**
@@ -5058,9 +5068,6 @@ export class Browser {
5058
5068
  get browserContext() {
5059
5069
  return this.ensureContext();
5060
5070
  }
5061
- get nativeVideoStartTime() {
5062
- return this.nativeVideoStartedAt;
5063
- }
5064
5071
  /**
5065
5072
  * Observation pass for mock data generation.
5066
5073
  * Navigates to the given URL, waits for network idle, and records all JSON API responses.
@@ -241,13 +241,7 @@ export async function runCapture(options) {
241
241
  }
242
242
  if (recordable) {
243
243
  recordingDir = await fs.mkdtemp(path.join(os.tmpdir(), `autokap-${program.mediaMode}-`));
244
- const nativeCloudClipRecording = program.mediaMode === 'clip'
245
- && process.env.AUTOKAP_CLOUD_RUNNER === '1'
246
- && process.env.AUTOKAP_CLIP_RECORDER !== 'cdp';
247
- if (nativeCloudClipRecording) {
248
- logger.info('[capture] Cloud clip recorder: native Playwright video (CDP screenshot loop disabled)');
249
- }
250
- browser = await Browser.forClipCapture(browserOptions, buildCursorOverlayScript(program.artifactPlan.cursorTheme ?? 'minimal'), nativeCloudClipRecording ? { nativeVideoDir: recordingDir } : undefined);
244
+ browser = await Browser.forClipCapture(browserOptions, buildCursorOverlayScript(program.artifactPlan.cursorTheme ?? 'minimal'));
251
245
  }
252
246
  else if (browserOptions.headed) {
253
247
  // Headed mode: standalone browser (pool is always headless)
package/dist/cli.js CHANGED
@@ -316,19 +316,31 @@ program
316
316
  return;
317
317
  lastProgressCheckpointAt = now;
318
318
  }
319
+ // Surface failures loudly to stderr (logger.error → fly.io machine logs).
320
+ // Cloud→cloud HTTP can fail silently for many reasons (DNS, firewall,
321
+ // expired token, dashboard cold-start) and dropping a single checkpoint
322
+ // produces a "stuck progress bar" symptom on the dashboard with no clue.
323
+ // The URL + status + error code are the minimum needed to debug from
324
+ // `flyctl logs` after the fact.
325
+ const checkpointType = typeof body.type === 'string' ? body.type : 'unknown';
319
326
  try {
320
327
  const response = await fetch(checkpointUrl, {
321
328
  method: 'POST',
322
329
  headers: { ...authHeaders(config), 'Content-Type': 'application/json' },
323
330
  body: JSON.stringify(body),
331
+ signal: AbortSignal.timeout(15_000),
324
332
  });
325
333
  if (!response.ok) {
326
334
  const bodyText = await response.text().catch(() => response.statusText);
327
- logger.warn(`[auto-recapture] Cloud checkpoint non-OK (${response.status}): ${bodyText}`);
335
+ logger.error(`[auto-recapture] Cloud checkpoint POST failed: status=${response.status} ` +
336
+ `type=${checkpointType} url=${checkpointUrl} body=${bodyText.slice(0, 200)}`);
328
337
  }
329
338
  }
330
339
  catch (err) {
331
- logger.warn(`[auto-recapture] Cloud checkpoint failed (best-effort): ${err.message}`);
340
+ const error = err;
341
+ const code = error.code ?? error.name ?? 'unknown';
342
+ logger.error(`[auto-recapture] Cloud checkpoint POST errored: code=${code} ` +
343
+ `type=${checkpointType} url=${checkpointUrl} message=${error.message}`);
332
344
  }
333
345
  };
334
346
  /**
@@ -357,7 +369,51 @@ program
357
369
  logger.warn(`[auto-recapture] Cloud callback failed (best-effort): ${err.message}`);
358
370
  }
359
371
  };
360
- const data = await requestJson(config, `/api/cli/projects/${opts.project}/auto-recapture-presets`, { headers: authHeaders(config) }, 'Failed to list auto-recapture presets');
372
+ // Emit a CLI-side "booted" checkpoint before any other network call.
373
+ // The orchestrator already wrote a `run_start` "cloud runner machine
374
+ // started" event when it submitted the Fly machine — that event proves
375
+ // the machine was *scheduled*, not that the container actually booted.
376
+ // Emitting this from inside the container (and BEFORE the presets fetch)
377
+ // is the dashboard's only proof the CLI is alive. If this never arrives,
378
+ // the issue is container boot or fly→dashboard connectivity, not the
379
+ // capture pipeline.
380
+ const cliVersionForCheckpoint = process.env.AUTOKAP_CLI_VERSION ?? version;
381
+ await postCloudCheckpoint({
382
+ type: 'run_start',
383
+ status: 'running',
384
+ message: `CLI booted on cloud runner (autokap@${cliVersionForCheckpoint}) — fetching plan`,
385
+ });
386
+ // Fetch the presets list with a hard timeout. Without this, a slow or
387
+ // unreachable dashboard would leave the CLI hanging forever — the
388
+ // dashboard would stay stuck at "machine started" with no error surfaced.
389
+ const presetsPath = `/api/cli/projects/${opts.project}/auto-recapture-presets`;
390
+ let data;
391
+ try {
392
+ const response = await fetch(buildApiUrl(config, presetsPath), {
393
+ headers: authHeaders(config),
394
+ signal: AbortSignal.timeout(30_000),
395
+ });
396
+ if (!response.ok) {
397
+ const errorBody = await readApiError(response);
398
+ throw new Error(`HTTP ${response.status}: ${errorBody}`);
399
+ }
400
+ data = await response.json();
401
+ }
402
+ catch (error) {
403
+ const err = error;
404
+ const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
405
+ const reason = isTimeout
406
+ ? `presets fetch timed out after 30s — dashboard unreachable or unresponsive`
407
+ : `presets fetch failed: ${err.code ? `${err.code} ` : ''}${err.message}`;
408
+ await postCloudCheckpoint({
409
+ type: 'error',
410
+ status: 'failed',
411
+ message: reason,
412
+ errorMessage: reason,
413
+ });
414
+ await notifyCloudCallback('failed', { totalPresets: 0, failedPresets: 0, errorMessage: reason });
415
+ fatal(`Failed to list auto-recapture presets: ${reason}`);
416
+ }
361
417
  await postCloudCheckpoint({
362
418
  type: 'run_plan',
363
419
  totalPresets: data.presets.length,
@@ -352,32 +352,21 @@ export class WebPlaywrightLocal {
352
352
  const cloudClipFps = isCloudRunner ? 30 : defaultFps;
353
353
  const targetFps = options.captureFps
354
354
  ?? (options.mediaMode === 'video' ? 30 : cloudClipFps);
355
- const nativeClipVideo = options.mediaMode === 'clip' && isCloudRunner
356
- ? page.video()
357
- : null;
358
- let loop = null;
359
- if (nativeClipVideo) {
360
- logger.info('[capture] Native cloud clip recorder enabled using Playwright video stream instead of CDP screenshots');
361
- }
362
- else {
363
- loop = new ClipCaptureLoop({
364
- page,
365
- framesDir,
366
- targetFps,
367
- // Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
368
- // (sized for tight CI runners) to let the loop stay close to its target.
369
- minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
370
- });
371
- await loop.start();
372
- }
355
+ const loop = new ClipCaptureLoop({
356
+ page,
357
+ framesDir,
358
+ targetFps,
359
+ // Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
360
+ // (sized for tight CI runners) to let the loop stay close to its target.
361
+ minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
362
+ });
363
+ await loop.start();
373
364
  this.recording = {
374
365
  mediaMode: options.mediaMode,
375
366
  startedAt: Date.now(),
376
367
  framesDir,
377
368
  mp4Path: path.join(baseDir, `${options.mediaMode}.mp4`),
378
369
  loop,
379
- nativeVideo: nativeClipVideo,
380
- nativeVideoStartedAt: this.browser.nativeVideoStartTime,
381
370
  finalized: false,
382
371
  };
383
372
  this.clipCursor = {
@@ -472,32 +461,6 @@ export class WebPlaywrightLocal {
472
461
  this.recordingNavWatcher.detach();
473
462
  this.recordingNavWatcher = null;
474
463
  }
475
- if (this.recording.nativeVideo) {
476
- const video = this.recording.nativeVideo;
477
- const trimStartMs = Math.max(0, this.recording.startedAt
478
- - (this.recording.nativeVideoStartedAt ?? this.sessionStartedAt));
479
- await this.browser.closeContext();
480
- const videoPath = await video.path();
481
- const durationMs = await getMediaDurationMs(videoPath);
482
- logger.info(`[capture] Native clip recording finalized: source ${durationMs}ms, ` +
483
- `trim start ${Math.round(trimStartMs)}ms`);
484
- this.recording.finalized = true;
485
- this.recording.sourcePath = videoPath;
486
- this.recording.sourceMimeType = 'video/webm';
487
- this.recording.trimStartMs = trimStartMs;
488
- this.recording.encodedDurationMs = durationMs;
489
- this.clipCursor = null;
490
- const buffer = await fs.readFile(videoPath);
491
- return {
492
- buffer,
493
- durationMs,
494
- mimeType: 'video/webm',
495
- trimStartMs,
496
- };
497
- }
498
- if (!this.recording.loop) {
499
- throw new Error('recording loop was not initialized');
500
- }
501
464
  const result = await this.recording.loop.stop();
502
465
  logger.info(`[capture] Clip frame capture: ${result.frameCount} frame(s), ` +
503
466
  `${result.measuredFps.toFixed(1)} fps over ${(result.actualDurationMs / 1000).toFixed(2)}s ` +
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.3.6",
3
+ "version": "1.3.8",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",