autokap 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  import { chromium } from 'playwright';
2
2
  import { logger } from './logger.js';
3
+ import { ensureChromiumInstalled } from './playwright-installer.js';
3
4
  /**
4
5
  * Opens a headed Chromium window, lets the user log in, and uploads the
5
6
  * resulting storageState (cookies + localStorage, HttpOnly included) to the
@@ -11,6 +12,7 @@ import { logger } from './logger.js';
11
12
  */
12
13
  export async function captureAuthSession(options) {
13
14
  const { apiBaseUrl, apiKey, projectId, accountId, startUrl } = options;
15
+ await ensureChromiumInstalled();
14
16
  logger.info('[auth] Launching Chromium…');
15
17
  const browser = await chromium.launch({ headless: false });
16
18
  let context = null;
@@ -58,6 +58,13 @@ export interface VideoClipMetadata {
58
58
  * SFX in the video compositor.
59
59
  */
60
60
  keystrokeOffsetsMs?: number[];
61
+ /**
62
+ * For CLICK / DOUBLE_CLICK / CHECK opcodes captured in clipCursor mode:
63
+ * clip-relative ms timestamp of each actual click dispatched by Playwright
64
+ * (measured AFTER the cursor animation settled). Drives mouse SFX in
65
+ * sync with the visible click.
66
+ */
67
+ clickOffsetsMs?: number[];
61
68
  }>;
62
69
  }
63
70
  export interface VideoAudioAsset {
@@ -19,6 +19,7 @@ import { Browser } from './browser.js';
19
19
  import { API_BASE_URL_ENV_VAR, requireConfig } from './cli-config.js';
20
20
  import { WebPlaywrightLocal } from './web-playwright-local.js';
21
21
  import { executeProgram } from './opcode-runner.js';
22
+ import { ensureChromiumInstalled } from './playwright-installer.js';
22
23
  import { RecoveryChainImpl } from './recovery-chain.js';
23
24
  import { parseProgram } from './execution-schema.js';
24
25
  import { buildCursorOverlayScript } from './cursor-overlay-script.js';
@@ -108,6 +109,18 @@ function normalizeNumericScale(value) {
108
109
  const HEALER_SYSTEM_PROMPT = 'You repair failed deterministic browser opcodes. Respond only with JSON.';
109
110
  // ── Main entry point ────────────────────────────────────────────────
110
111
  export async function runCapture(options) {
112
+ // Self-heal a missing Playwright Chromium binary BEFORE anything else.
113
+ // Skipped postinstalls or downstream Playwright version bumps would
114
+ // otherwise surface as a cryptic launch failure mid-capture.
115
+ try {
116
+ await ensureChromiumInstalled();
117
+ }
118
+ catch (err) {
119
+ return {
120
+ success: false,
121
+ error: `playwright chromium install failed: ${err instanceof Error ? err.message : String(err)}`,
122
+ };
123
+ }
111
124
  const config = await requireConfig();
112
125
  // Step 1: Get the compiled program
113
126
  let resolvedProgram;
@@ -751,6 +764,9 @@ export function buildVideoClipMetadata(videoId, result, program, runId) {
751
764
  ...(t.keystrokeOffsetsMs && t.keystrokeOffsetsMs.length > 0
752
765
  ? { keystrokeOffsetsMs: t.keystrokeOffsetsMs }
753
766
  : {}),
767
+ ...(t.clickOffsetsMs && t.clickOffsetsMs.length > 0
768
+ ? { clickOffsetsMs: t.clickOffsetsMs }
769
+ : {}),
754
770
  }));
755
771
  clipsByKey.set(`${variantId}:${artifact.clipId}`, {
756
772
  variantId,
@@ -954,8 +970,8 @@ async function prepareDirectArtifactUpload(params) {
954
970
  clipName: artifact.clipName ?? null,
955
971
  stepDescription: artifact.stepDescription ?? null,
956
972
  stepIndex: typeof artifact.stepIndex === 'number' ? artifact.stepIndex : null,
957
- durationMs: typeof artifact.durationMs === 'number' ? artifact.durationMs : null,
958
- trimStartMs: typeof artifact.trimStartMs === 'number' ? artifact.trimStartMs : null,
973
+ durationMs: typeof artifact.durationMs === 'number' ? Math.round(artifact.durationMs) : null,
974
+ trimStartMs: typeof artifact.trimStartMs === 'number' ? Math.round(artifact.trimStartMs) : null,
959
975
  artifactPlan: program.artifactPlan,
960
976
  tabIconMimeType: artifact.tabIconData ? (artifact.tabIconMimeType ?? 'image/png') : null,
961
977
  tabIconSha256,
@@ -113,6 +113,14 @@ export function buildCursorOverlayScript(theme = 'minimal') {
113
113
  triggerPulse();
114
114
  };
115
115
 
116
+ // AUT-80 — Browser-side mousedown timestamp buffer. Real CDP-dispatched
117
+ // \`mousedown\` events fire here at the exact moment the click happens in
118
+ // the recorded video (the cursor pulse is purely decorative and fires
119
+ // slightly later). The runner reads this buffer after each click action
120
+ // and uses the timestamps for the mouse SFX track so audio = visual
121
+ // click, even across cursor animation latency and frame quantisation.
122
+ window.__akClickAt = [];
123
+
116
124
  // Keep DOM event listeners as fallback for real mouse events (headed mode)
117
125
  document.addEventListener('mousemove', function(e) {
118
126
  setCursorPosition(e.clientX, e.clientY);
@@ -122,6 +130,7 @@ export function buildCursorOverlayScript(theme = 'minimal') {
122
130
  setCursorPosition(e.clientX, e.clientY);
123
131
  cursor.classList.add('__ak_pressed');
124
132
  triggerPulse();
133
+ window.__akClickAt.push(Date.now());
125
134
  }, true);
126
135
  window.addEventListener('mouseup', function(e) {
127
136
  setCursorPosition(e.clientX, e.clientY);
@@ -130,6 +139,14 @@ export function buildCursorOverlayScript(theme = 'minimal') {
130
139
  window.addEventListener('click', function(e) {
131
140
  setCursorPosition(e.clientX, e.clientY);
132
141
  triggerPulse();
142
+ // Capture synthetic click() dispatches that bypass mousedown (e.g.
143
+ // dispatchEvent('click') from JS-dispatch opcode paths). Skip if a
144
+ // mousedown landed in the last 80 ms so we don't double-count a
145
+ // regular mouse-driven click.
146
+ var last = window.__akClickAt[window.__akClickAt.length - 1];
147
+ if (last == null || (Date.now() - last) > 80) {
148
+ window.__akClickAt.push(Date.now());
149
+ }
133
150
  }, true);
134
151
  }
135
152
 
@@ -700,6 +700,14 @@ export interface OpcodeTiming {
700
700
  * for non-TYPE opcodes and for typing paths that bypass humanType.
701
701
  */
702
702
  keystrokeOffsetsMs?: number[];
703
+ /**
704
+ * For CLICK / DOUBLE_CLICK / CHECK opcodes captured in clipCursor mode:
705
+ * timestamp (ms relative to the active clip start) at which Playwright
706
+ * dispatched each actual click — measured AFTER the cursor animation
707
+ * settled on the target. Drives mouse SFX in sync with the visible click.
708
+ * Empty/undefined for opcodes whose adapter doesn't surface the timestamp.
709
+ */
710
+ clickOffsetsMs?: number[];
703
711
  }
704
712
  export interface RunResult {
705
713
  programId: string;
@@ -736,6 +744,25 @@ export interface ClickOptions {
736
744
  };
737
745
  /** Mouse button. Default: 'left' */
738
746
  button?: 'left' | 'right' | 'middle';
747
+ /**
748
+ * Fired with `Date.now()` right before Playwright dispatches the actual
749
+ * click — i.e. AFTER the visible cursor animation has settled on the
750
+ * target. The runner converts the wall-clock to clip-relative offsets so
751
+ * the video compositor can fire mouse SFX in lock-step with the visible
752
+ * click (instead of when the cursor was still travelling).
753
+ */
754
+ onClick?: (timestampMs: number) => void;
755
+ }
756
+ export interface ClickByTargetOptions {
757
+ selector?: string;
758
+ target?: SemanticTarget;
759
+ selectorAlternates?: string[];
760
+ onClick?: (timestampMs: number) => void;
761
+ }
762
+ export interface MouseActionOptions {
763
+ /** Same semantics as `ClickOptions.onClick` — fires right before the
764
+ * actual click is dispatched (CHECK / DOUBLE_CLICK). */
765
+ onClick?: (timestampMs: number) => void;
739
766
  }
740
767
  export interface RecordingOptions {
741
768
  mediaMode: 'clip' | 'video';
@@ -815,11 +842,7 @@ export interface RuntimeAdapter {
815
842
  } | null>;
816
843
  close(): Promise<void>;
817
844
  /** Click an element by semantic target. Falls back to selector if target not found. */
818
- clickByTarget?(opts: {
819
- selector?: string;
820
- target?: SemanticTarget;
821
- selectorAlternates?: string[];
822
- }): Promise<void>;
845
+ clickByTarget?(opts: ClickByTargetOptions): Promise<void>;
823
846
  /** Type into an element by semantic target. */
824
847
  typeByTarget?(opts: {
825
848
  selector?: string;
@@ -849,8 +872,8 @@ export interface RuntimeAdapter {
849
872
  value?: string;
850
873
  index?: number;
851
874
  }): Promise<void>;
852
- check?(selector: string, checked: boolean): Promise<void>;
853
- doubleClick?(selector: string): Promise<void>;
875
+ check?(selector: string, checked: boolean, opts?: MouseActionOptions): Promise<void>;
876
+ doubleClick?(selector: string, opts?: MouseActionOptions): Promise<void>;
854
877
  /**
855
878
  * Drag the source element from point A to point B with an animated cursor
856
879
  * when a clip is recording. Destination is either another element
@@ -46,5 +46,13 @@ export interface OpcodeActionResult {
46
46
  * clip-relative offsets so the video compositor can fire per-keystroke SFX.
47
47
  */
48
48
  keystrokeTimestampsMs?: number[];
49
+ /**
50
+ * For CLICK / DOUBLE_CLICK / CHECK opcodes: absolute wall-clock timestamps
51
+ * captured INSIDE the adapter, just before Playwright dispatches the
52
+ * actual click — i.e. AFTER the cursor animation has settled on the
53
+ * target. Lets the compositor place the mouse SFX in sync with the visible
54
+ * click instead of when the cursor was still travelling.
55
+ */
56
+ clickTimestampsMs?: number[];
49
57
  }
50
58
  export declare function executeOpcodeCoreAction(opcode: ExecutionOpcode, adapter: RuntimeAdapter, context?: OpcodeActionContext): Promise<OpcodeActionResult>;
@@ -62,9 +62,13 @@ export async function executeOpcodeCoreAction(opcode, adapter, context = {}) {
62
62
  case 'DISMISS_OVERLAYS':
63
63
  await dismissAllOverlays(adapter);
64
64
  break;
65
- case 'CLICK':
65
+ case 'CLICK': {
66
+ const clickTimestampsMs = [];
67
+ const onClick = (timestampMs) => {
68
+ clickTimestampsMs.push(timestampMs);
69
+ };
66
70
  try {
67
- await adapter.click(opcode.selector, opcode.button ? { button: opcode.button } : undefined);
71
+ await adapter.click(opcode.selector, { ...(opcode.button ? { button: opcode.button } : {}), onClick });
68
72
  }
69
73
  catch (error) {
70
74
  if (!opcode.target || !adapter.clickByTarget)
@@ -73,9 +77,11 @@ export async function executeOpcodeCoreAction(opcode, adapter, context = {}) {
73
77
  selector: opcode.selector,
74
78
  target: opcode.target,
75
79
  selectorAlternates: opcode.selectorAlternates,
80
+ onClick,
76
81
  });
77
82
  }
78
- break;
83
+ return { success: true, clickTimestampsMs };
84
+ }
79
85
  case 'TYPE': {
80
86
  const rawText = (opcode.textByLocale && context.currentVariant?.locale
81
87
  ? opcode.textByLocale[context.currentVariant.locale] ?? opcode.text
@@ -174,16 +180,24 @@ export async function executeOpcodeCoreAction(opcode, adapter, context = {}) {
174
180
  index: opcode.optionIndex,
175
181
  });
176
182
  break;
177
- case 'CHECK':
183
+ case 'CHECK': {
178
184
  if (!adapter.check)
179
185
  return { success: false, error: 'adapter does not support CHECK' };
180
- await adapter.check(opcode.selector, opcode.checked);
181
- break;
182
- case 'DOUBLE_CLICK':
186
+ const clickTimestampsMs = [];
187
+ await adapter.check(opcode.selector, opcode.checked, {
188
+ onClick: (timestampMs) => clickTimestampsMs.push(timestampMs),
189
+ });
190
+ return { success: true, clickTimestampsMs };
191
+ }
192
+ case 'DOUBLE_CLICK': {
183
193
  if (!adapter.doubleClick)
184
194
  return { success: false, error: 'adapter does not support DOUBLE_CLICK' };
185
- await adapter.doubleClick(opcode.selector);
186
- break;
195
+ const clickTimestampsMs = [];
196
+ await adapter.doubleClick(opcode.selector, {
197
+ onClick: (timestampMs) => clickTimestampsMs.push(timestampMs),
198
+ });
199
+ return { success: true, clickTimestampsMs };
200
+ }
187
201
  case 'DRAG':
188
202
  if (!adapter.drag)
189
203
  return { success: false, error: 'adapter does not support DRAG' };
@@ -290,6 +290,9 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
290
290
  const keystrokeOffsetsMs = result.keystrokeTimestampsMs && result.keystrokeTimestampsMs.length > 0
291
291
  ? result.keystrokeTimestampsMs.map((t) => Math.max(0, t - preTiming.clipStartedAt))
292
292
  : undefined;
293
+ const clickOffsetsMs = result.clickTimestampsMs && result.clickTimestampsMs.length > 0
294
+ ? result.clickTimestampsMs.map((t) => Math.max(0, t - preTiming.clipStartedAt))
295
+ : undefined;
293
296
  opcodeTimings.push({
294
297
  stepIndex: index,
295
298
  stepId: opcode.stepId,
@@ -300,6 +303,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
300
303
  timecodeEndMs: Math.max(0, Date.now() - preTiming.clipStartedAt),
301
304
  bbox: preTiming.bbox,
302
305
  ...(keystrokeOffsetsMs ? { keystrokeOffsetsMs } : {}),
306
+ ...(clickOffsetsMs ? { clickOffsetsMs } : {}),
303
307
  });
304
308
  }
305
309
  if (!result.success) {
@@ -666,6 +670,12 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
666
670
  }
667
671
  case 'END_CLIP': {
668
672
  const clipIdentity = resolveClipIdentity(executionState.activeClip, opcode);
673
+ // Capture the URL BEFORE endRecording(): the local CDP branch of the
674
+ // adapter closes the browser context inside endRecording() to release
675
+ // the CDP session, which makes any subsequent browser operation throw
676
+ // "Browser not launched". The clip ends immediately before this call,
677
+ // so the URL is still accurate.
678
+ const captureUrl = await adapter.getCurrentUrl();
669
679
  const recording = await adapter.endRecording();
670
680
  executionState.activeClip = undefined;
671
681
  // Match the artifact's mediaMode to the program's so the upload route
@@ -680,7 +690,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
680
690
  trimStartMs: recording.trimStartMs,
681
691
  dimensions: undefined,
682
692
  captureType: 'fullpage',
683
- captureUrl: await adapter.getCurrentUrl(),
693
+ captureUrl,
684
694
  clipId: clipIdentity.clipId,
685
695
  clipName: clipIdentity.clipName,
686
696
  stepDescription: opcode.description,
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Preflight Playwright Chromium binary install.
3
+ *
4
+ * Run before any `chromium.launch(...)` to make sure the Playwright Chromium
5
+ * binary exists on disk. The package ships a `postinstall` hook that fetches
6
+ * Chromium at install time, but that step can be silently skipped on some
7
+ * setups (CI caches, monorepos, npm scripts) — and any Playwright version
8
+ * bump downstream invalidates the existing binary too. Catching this once at
9
+ * run start avoids the famously confusing "Executable doesn't exist at …"
10
+ * launch failure mid-capture.
11
+ *
12
+ * Cross-platform: uses `npx playwright install chromium` (works on macOS,
13
+ * Linux, Windows). Output is streamed inherit so the user sees the
14
+ * download progress.
15
+ */
16
+ export declare function ensureChromiumInstalled(): Promise<void>;
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Preflight Playwright Chromium binary install.
3
+ *
4
+ * Run before any `chromium.launch(...)` to make sure the Playwright Chromium
5
+ * binary exists on disk. The package ships a `postinstall` hook that fetches
6
+ * Chromium at install time, but that step can be silently skipped on some
7
+ * setups (CI caches, monorepos, npm scripts) — and any Playwright version
8
+ * bump downstream invalidates the existing binary too. Catching this once at
9
+ * run start avoids the famously confusing "Executable doesn't exist at …"
10
+ * launch failure mid-capture.
11
+ *
12
+ * Cross-platform: uses `npx playwright install chromium` (works on macOS,
13
+ * Linux, Windows). Output is streamed inherit so the user sees the
14
+ * download progress.
15
+ */
16
+ import { spawn } from 'node:child_process';
17
+ import { existsSync } from 'node:fs';
18
+ import { chromium } from 'playwright';
19
+ import { logger } from './logger.js';
20
+ let cachedCheckResult = null;
21
+ export async function ensureChromiumInstalled() {
22
+ // One-shot per process: once we've verified (or installed) the binary,
23
+ // skip every subsequent call. `chromium.executablePath()` is cheap but
24
+ // `existsSync` adds up across recovery retries.
25
+ if (cachedCheckResult === 'ok')
26
+ return;
27
+ let execPath = '';
28
+ try {
29
+ execPath = chromium.executablePath();
30
+ }
31
+ catch {
32
+ // executablePath() throws if Playwright has no path resolved at all.
33
+ // Treat the same as "not installed" so we run the install.
34
+ execPath = '';
35
+ }
36
+ if (execPath && existsSync(execPath)) {
37
+ cachedCheckResult = 'ok';
38
+ return;
39
+ }
40
+ logger.info('[playwright] Chromium browser is missing — installing it now (one-time, takes ~30s)…');
41
+ await runPlaywrightInstall();
42
+ // Re-check; if the install genuinely succeeded `executablePath()` now
43
+ // resolves to a real file. If it still doesn't, surface a clearer error
44
+ // than the raw launch failure.
45
+ try {
46
+ const refreshed = chromium.executablePath();
47
+ if (existsSync(refreshed)) {
48
+ cachedCheckResult = 'ok';
49
+ logger.info('[playwright] Chromium installed.');
50
+ return;
51
+ }
52
+ }
53
+ catch {
54
+ // fallthrough to error below
55
+ }
56
+ throw new Error('Playwright Chromium install completed but the binary is still missing. ' +
57
+ 'Run `npx playwright install chromium` manually to diagnose.');
58
+ }
59
+ async function runPlaywrightInstall() {
60
+ return new Promise((resolve, reject) => {
61
+ // Use `npx` so we hit whichever Playwright version this CLI depends on,
62
+ // regardless of whether the user has a global `playwright` binary.
63
+ const child = spawn('npx', ['--yes', 'playwright', 'install', 'chromium'], {
64
+ stdio: 'inherit',
65
+ });
66
+ child.on('error', (err) => {
67
+ reject(new Error(`Failed to spawn \`npx playwright install chromium\`: ${err.message}. ` +
68
+ 'Make sure Node.js + npm are installed and on PATH.'));
69
+ });
70
+ child.on('close', (code) => {
71
+ if (code === 0) {
72
+ resolve();
73
+ return;
74
+ }
75
+ reject(new Error(`\`npx playwright install chromium\` exited with code ${code}. ` +
76
+ 'Run it manually for full diagnostics.'));
77
+ });
78
+ });
79
+ }
80
+ //# sourceMappingURL=playwright-installer.js.map
@@ -34,7 +34,9 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
34
34
  * Click an element using semantic target resolution.
35
35
  * Tries CSS selector first, falls back to Playwright semantic locators.
36
36
  */
37
- clickByTarget(opts: ResolveOptions): Promise<void>;
37
+ clickByTarget(opts: ResolveOptions & {
38
+ onClick?: (timestampMs: number) => void;
39
+ }): Promise<void>;
38
40
  /**
39
41
  * Type into an element using semantic target resolution.
40
42
  */
@@ -87,8 +89,12 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
87
89
  value?: string;
88
90
  index?: number;
89
91
  }): Promise<void>;
90
- check(selector: string, checked: boolean): Promise<void>;
91
- doubleClick(selector: string): Promise<void>;
92
+ check(selector: string, checked: boolean, actionOpts?: {
93
+ onClick?: (timestampMs: number) => void;
94
+ }): Promise<void>;
95
+ doubleClick(selector: string, actionOpts?: {
96
+ onClick?: (timestampMs: number) => void;
97
+ }): Promise<void>;
92
98
  drag(opts: {
93
99
  selector?: string;
94
100
  target?: SemanticTarget;
@@ -159,4 +165,18 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
159
165
  private relativeClickPosition;
160
166
  private moveClipCursorToPoint;
161
167
  private emitClipClickPulse;
168
+ /**
169
+ * Drain the browser-side `__akClickAt` buffer for timestamps newer than
170
+ * `sinceMs`, replay them through `onClick`, and reset the buffer so the
171
+ * next click action starts fresh. This is what makes mouse SFX line up
172
+ * exactly with the visual click in the recorded video — the mousedown
173
+ * listener inside the cursor overlay timestamps each click at the same
174
+ * instant the browser dispatches it, which is also the frame the CDP
175
+ * screencast captures.
176
+ *
177
+ * Falls back to `sinceMs` (Node wall-clock at action dispatch) when the
178
+ * buffer is empty (e.g. `useKeyboard` Enter-press path, or transient
179
+ * page.evaluate failure).
180
+ */
181
+ private reportClickSfxTimestamps;
162
182
  }
@@ -80,7 +80,9 @@ export class WebPlaywrightLocal {
80
80
  try {
81
81
  if (options?.coordinates) {
82
82
  await this.moveClipCursorToPoint(options.coordinates);
83
+ const dispatchedAt = Date.now();
83
84
  await this.browser.clickByCoordinates(options.coordinates.x, options.coordinates.y);
85
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
84
86
  logger.debug(`[click] done coords took ${Date.now() - t0}ms`);
85
87
  return;
86
88
  }
@@ -88,12 +90,17 @@ export class WebPlaywrightLocal {
88
90
  const animatedTarget = await this.moveClipCursorToLocator(locator);
89
91
  if (options?.useKeyboard) {
90
92
  await locator.focus();
93
+ const dispatchedAt = Date.now();
91
94
  await page.keyboard.press('Enter');
95
+ // No real mousedown fires on Enter — fall back to Node timing.
96
+ options?.onClick?.(dispatchedAt);
92
97
  logger.debug(`[click] done keyboard took ${Date.now() - t0}ms`);
93
98
  return;
94
99
  }
95
100
  if (options?.useJsDispatch) {
101
+ const dispatchedAt = Date.now();
96
102
  await locator.dispatchEvent('click');
103
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
97
104
  logger.debug(`[click] done js_dispatch took ${Date.now() - t0}ms`);
98
105
  return;
99
106
  }
@@ -103,15 +110,18 @@ export class WebPlaywrightLocal {
103
110
  ? await this.relativeClickPosition(locator, animatedTarget)
104
111
  : null;
105
112
  if (options?.button && options.button !== 'left') {
113
+ const dispatchedAt = Date.now();
106
114
  await locator.click({
107
115
  button: options.button,
108
116
  timeout: 5000,
109
117
  force: options?.force,
110
118
  ...(clickPosition ? { position: clickPosition } : {}),
111
119
  });
120
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
112
121
  logger.debug(`[click] done button=${options.button} took ${Date.now() - t0}ms`);
113
122
  return;
114
123
  }
124
+ const dispatchedAt = Date.now();
115
125
  if (clickPosition) {
116
126
  await locator.click({
117
127
  timeout: 5000,
@@ -122,6 +132,7 @@ export class WebPlaywrightLocal {
122
132
  else {
123
133
  await this.browser.clickBySelector(selector, { force: options?.force });
124
134
  }
135
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
125
136
  await this.emitClipClickPulse();
126
137
  logger.debug(`[click] done normal took ${Date.now() - t0}ms`);
127
138
  }
@@ -144,10 +155,12 @@ export class WebPlaywrightLocal {
144
155
  const position = target
145
156
  ? await this.relativeClickPosition(resolved.locator, target)
146
157
  : null;
158
+ const dispatchedAt = Date.now();
147
159
  await resolved.locator.click({
148
160
  timeout: 5000,
149
161
  ...(position ? { position } : {}),
150
162
  });
163
+ await this.reportClickSfxTimestamps(page, dispatchedAt, opts.onClick);
151
164
  }
152
165
  /**
153
166
  * Type into an element using semantic target resolution.
@@ -633,7 +646,7 @@ export class WebPlaywrightLocal {
633
646
  optionIndex: option.index,
634
647
  });
635
648
  }
636
- async check(selector, checked) {
649
+ async check(selector, checked, actionOpts) {
637
650
  const page = await this.browser.currentPage;
638
651
  const locator = page.locator(selector).first();
639
652
  const target = await this.moveClipCursorToLocator(locator);
@@ -641,24 +654,28 @@ export class WebPlaywrightLocal {
641
654
  ? await this.relativeClickPosition(locator, target)
642
655
  : null;
643
656
  const opts = { timeout: 5000, ...(position ? { position } : {}) };
657
+ const dispatchedAt = Date.now();
644
658
  if (checked) {
645
659
  await locator.check(opts);
646
660
  }
647
661
  else {
648
662
  await locator.uncheck(opts);
649
663
  }
664
+ await this.reportClickSfxTimestamps(page, dispatchedAt, actionOpts?.onClick);
650
665
  }
651
- async doubleClick(selector) {
666
+ async doubleClick(selector, actionOpts) {
652
667
  const page = await this.browser.currentPage;
653
668
  const locator = page.locator(selector).first();
654
669
  const target = await this.moveClipCursorToLocator(locator);
655
670
  const position = target
656
671
  ? await this.relativeClickPosition(locator, target)
657
672
  : null;
673
+ const dispatchedAt = Date.now();
658
674
  await locator.dblclick({
659
675
  timeout: 5000,
660
676
  ...(position ? { position } : {}),
661
677
  });
678
+ await this.reportClickSfxTimestamps(page, dispatchedAt, actionOpts?.onClick);
662
679
  }
663
680
  async drag(opts) {
664
681
  const page = await this.browser.currentPage;
@@ -895,7 +912,10 @@ export class WebPlaywrightLocal {
895
912
  }
896
913
  await page.waitForTimeout(70);
897
914
  await humanType(page, text, this.clipCursor
898
- ? { minDelayMs: 20, maxDelayMs: 45, onKeystroke }
915
+ // Demo-video cadence: ~80-180ms between keys (≈ 80-100 WPM with
916
+ // natural variation). Faster than that reads as robotic in the
917
+ // mixed video + keyboard SFX track.
918
+ ? { minDelayMs: 80, maxDelayMs: 180, onKeystroke }
899
919
  : { onKeystroke });
900
920
  }
901
921
  async seedClipCursor(position) {
@@ -1008,6 +1028,47 @@ export class WebPlaywrightLocal {
1008
1028
  window.__akClickPulse(px, py);
1009
1029
  }, { px: Math.round(x), py: Math.round(y) }).catch(() => { });
1010
1030
  }
1031
+ /**
1032
+ * Drain the browser-side `__akClickAt` buffer for timestamps newer than
1033
+ * `sinceMs`, replay them through `onClick`, and reset the buffer so the
1034
+ * next click action starts fresh. This is what makes mouse SFX line up
1035
+ * exactly with the visual click in the recorded video — the mousedown
1036
+ * listener inside the cursor overlay timestamps each click at the same
1037
+ * instant the browser dispatches it, which is also the frame the CDP
1038
+ * screencast captures.
1039
+ *
1040
+ * Falls back to `sinceMs` (Node wall-clock at action dispatch) when the
1041
+ * buffer is empty (e.g. `useKeyboard` Enter-press path, or transient
1042
+ * page.evaluate failure).
1043
+ */
1044
+ async reportClickSfxTimestamps(page, sinceMs, onClick) {
1045
+ if (!onClick)
1046
+ return;
1047
+ let captured = [];
1048
+ try {
1049
+ captured = (await page.evaluate((cutoff) => {
1050
+ const buf = window.__akClickAt;
1051
+ if (!Array.isArray(buf))
1052
+ return [];
1053
+ const fresh = buf.filter((t) => typeof t === 'number' && t >= cutoff);
1054
+ // Reset the buffer so successive click actions don't see each
1055
+ // others' timestamps. We mutate the array in place to keep the
1056
+ // reference stable for any other consumers (none today).
1057
+ buf.length = 0;
1058
+ return fresh;
1059
+ }, sinceMs));
1060
+ }
1061
+ catch {
1062
+ captured = [];
1063
+ }
1064
+ if (captured.length > 0) {
1065
+ for (const t of captured)
1066
+ onClick(t);
1067
+ }
1068
+ else {
1069
+ onClick(sinceMs);
1070
+ }
1071
+ }
1011
1072
  }
1012
1073
  function describeResolveOptions(opts) {
1013
1074
  const parts = [];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.5.2",
3
+ "version": "1.5.4",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",