autokap 1.5.3 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -382,16 +382,23 @@ Capture a screenshot of the viewport or a specific element.
382
382
  | `captureId` | string | no | Stable identifier for Studio/dev links. Should match preset page/element id |
383
383
  | `captureName` | string | no | Human-readable label shown in Studio |
384
384
  | `elementSelector` | string | no | CSS selector for element-level capture (crops to element bounds) |
385
+ | `outscale` | OutscaleConfig | no | Padding around the captured element (only applied with `elementSelector`). Typically set by the user post-generation. Omit by default. |
385
386
 
386
387
  **Can:** Full-page viewport capture, element-level capture (cropped), LLM verification (detects blank/error/loading/overlay states), alt text generation, favicon extraction.
387
388
  **Cannot:** Capture content below the fold in a single shot (use SCROLL first). Capture cross-origin iframe content.
388
389
 
389
390
  **Tip:** Without `elementSelector`, captures the full viewport. With `elementSelector`, captures only that element's bounding box — useful for component-level screenshots.
390
391
 
392
+ **`outscale` shape:** all fields optional. `padding` is uniform (pixels). `paddingTop/Right/Bottom/Left` override per side. `paddingPercent` (0–100) scales with the element. `clampToViewport` (default `true`) prevents the crop from exceeding the document. `backgroundColor` fills any uncovered area.
393
+
391
394
  ```json
392
395
  { "kind": "CAPTURE_SCREENSHOT", "captureId": "dashboard-main", "captureName": "Dashboard", "elementSelector": "[data-ak=\"main-content\"]", "postcondition": { "type": "always" } }
393
396
  ```
394
397
 
398
+ ```json
399
+ { "kind": "CAPTURE_SCREENSHOT", "captureName": "Pricing card", "elementSelector": "[data-ak=\"pricing\"]", "outscale": { "padding": 24 }, "postcondition": { "type": "always" } }
400
+ ```
401
+
395
402
  ## BEGIN_CLIP
396
403
 
397
404
  Start recording a clip. All interactions between BEGIN_CLIP and END_CLIP are recorded.
@@ -163,7 +163,7 @@ interface VariantSpec {
163
163
  | `SET_THEME` | no | `theme`, `method`, `storageHints?` | `always` | Use `"$variant"`. Prefer `method: "storage"` |
164
164
  | `ASSERT_ROUTE` | no | `urlPattern` | `route_matches` | Validation checkpoint |
165
165
  | `ASSERT_SURFACE` | no | `selectors[]`, `matchAll` | `always` | Validation checkpoint |
166
- | `CAPTURE_SCREENSHOT` | no | `captureId`, `captureName`, `elementSelector?` | `always` | `elementSelector` for element-level crop |
166
+ | `CAPTURE_SCREENSHOT` | no | `captureId`, `captureName`, `elementSelector?`, `outscale?` | `always` | `elementSelector` for element-level crop. `outscale` adds padding around the element (user-edited post-generation; omit by default) |
167
167
  | `BEGIN_CLIP` | no | `clipId`, `clipName` | `always` | Start recording |
168
168
  | `END_CLIP` | no | `clipId`, `clipName` | `always` | Stop recording. Same `clipId` as BEGIN_CLIP |
169
169
  | `CLONE_ELEMENT` | yes | `sourceSelector`, `containerSelector`, `count` | `always` | **Non-blocking.** Duplicate a template element N times |
@@ -113,6 +113,14 @@ export function buildCursorOverlayScript(theme = 'minimal') {
113
113
  triggerPulse();
114
114
  };
115
115
 
116
+ // AUT-80 — Browser-side mousedown timestamp buffer. Real CDP-dispatched
117
+ // \`mousedown\` events fire here at the exact moment the click happens in
118
+ // the recorded video (the cursor pulse is purely decorative and fires
119
+ // slightly later). The runner reads this buffer after each click action
120
+ // and uses the timestamps for the mouse SFX track so audio = visual
121
+ // click, even across cursor animation latency and frame quantisation.
122
+ window.__akClickAt = [];
123
+
116
124
  // Keep DOM event listeners as fallback for real mouse events (headed mode)
117
125
  document.addEventListener('mousemove', function(e) {
118
126
  setCursorPosition(e.clientX, e.clientY);
@@ -122,6 +130,7 @@ export function buildCursorOverlayScript(theme = 'minimal') {
122
130
  setCursorPosition(e.clientX, e.clientY);
123
131
  cursor.classList.add('__ak_pressed');
124
132
  triggerPulse();
133
+ window.__akClickAt.push(Date.now());
125
134
  }, true);
126
135
  window.addEventListener('mouseup', function(e) {
127
136
  setCursorPosition(e.clientX, e.clientY);
@@ -130,6 +139,14 @@ export function buildCursorOverlayScript(theme = 'minimal') {
130
139
  window.addEventListener('click', function(e) {
131
140
  setCursorPosition(e.clientX, e.clientY);
132
141
  triggerPulse();
142
+ // Capture synthetic click() dispatches that bypass mousedown (e.g.
143
+ // dispatchEvent('click') from JS-dispatch opcode paths). Skip if a
144
+ // mousedown landed in the last 80 ms so we don't double-count a
145
+ // regular mouse-driven click.
146
+ var last = window.__akClickAt[window.__akClickAt.length - 1];
147
+ if (last == null || (Date.now() - last) > 80) {
148
+ window.__akClickAt.push(Date.now());
149
+ }
133
150
  }, true);
134
151
  }
135
152
 
@@ -29,6 +29,16 @@ export declare const RecoveryPolicySchema: z.ZodObject<{
29
29
  allowReload: z.ZodBoolean;
30
30
  allowHealer: z.ZodBoolean;
31
31
  }, z.core.$strict>;
32
+ export declare const OutscaleConfigSchema: z.ZodObject<{
33
+ padding: z.ZodOptional<z.ZodNumber>;
34
+ paddingTop: z.ZodOptional<z.ZodNumber>;
35
+ paddingRight: z.ZodOptional<z.ZodNumber>;
36
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
37
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
38
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
39
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
40
+ backgroundColor: z.ZodOptional<z.ZodString>;
41
+ }, z.core.$strict>;
32
42
  export declare const ExecutionOpcodeSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
33
43
  url: z.ZodString;
34
44
  description: z.ZodString;
@@ -495,6 +505,16 @@ export declare const ExecutionOpcodeSchema: z.ZodDiscriminatedUnion<[z.ZodObject
495
505
  captureId: z.ZodOptional<z.ZodString>;
496
506
  captureName: z.ZodOptional<z.ZodString>;
497
507
  elementSelector: z.ZodOptional<z.ZodString>;
508
+ outscale: z.ZodOptional<z.ZodObject<{
509
+ padding: z.ZodOptional<z.ZodNumber>;
510
+ paddingTop: z.ZodOptional<z.ZodNumber>;
511
+ paddingRight: z.ZodOptional<z.ZodNumber>;
512
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
513
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
514
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
515
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
516
+ backgroundColor: z.ZodOptional<z.ZodString>;
517
+ }, z.core.$strict>>;
498
518
  description: z.ZodString;
499
519
  postcondition: z.ZodObject<{
500
520
  type: z.ZodEnum<{
@@ -1688,6 +1708,16 @@ export declare const ExecutionProgramSchema: z.ZodObject<{
1688
1708
  captureId: z.ZodOptional<z.ZodString>;
1689
1709
  captureName: z.ZodOptional<z.ZodString>;
1690
1710
  elementSelector: z.ZodOptional<z.ZodString>;
1711
+ outscale: z.ZodOptional<z.ZodObject<{
1712
+ padding: z.ZodOptional<z.ZodNumber>;
1713
+ paddingTop: z.ZodOptional<z.ZodNumber>;
1714
+ paddingRight: z.ZodOptional<z.ZodNumber>;
1715
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
1716
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
1717
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
1718
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
1719
+ backgroundColor: z.ZodOptional<z.ZodString>;
1720
+ }, z.core.$strict>>;
1691
1721
  description: z.ZodString;
1692
1722
  postcondition: z.ZodObject<{
1693
1723
  type: z.ZodEnum<{
@@ -2665,6 +2695,16 @@ export declare const HealerPatchSchema: z.ZodObject<{
2665
2695
  captureId: z.ZodOptional<z.ZodString>;
2666
2696
  captureName: z.ZodOptional<z.ZodString>;
2667
2697
  elementSelector: z.ZodOptional<z.ZodString>;
2698
+ outscale: z.ZodOptional<z.ZodObject<{
2699
+ padding: z.ZodOptional<z.ZodNumber>;
2700
+ paddingTop: z.ZodOptional<z.ZodNumber>;
2701
+ paddingRight: z.ZodOptional<z.ZodNumber>;
2702
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
2703
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
2704
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
2705
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
2706
+ backgroundColor: z.ZodOptional<z.ZodString>;
2707
+ }, z.core.$strict>>;
2668
2708
  description: z.ZodString;
2669
2709
  postcondition: z.ZodObject<{
2670
2710
  type: z.ZodEnum<{
@@ -3588,6 +3628,16 @@ export declare const HealerPatchSchema: z.ZodObject<{
3588
3628
  captureId: z.ZodOptional<z.ZodString>;
3589
3629
  captureName: z.ZodOptional<z.ZodString>;
3590
3630
  elementSelector: z.ZodOptional<z.ZodString>;
3631
+ outscale: z.ZodOptional<z.ZodObject<{
3632
+ padding: z.ZodOptional<z.ZodNumber>;
3633
+ paddingTop: z.ZodOptional<z.ZodNumber>;
3634
+ paddingRight: z.ZodOptional<z.ZodNumber>;
3635
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
3636
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
3637
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
3638
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
3639
+ backgroundColor: z.ZodOptional<z.ZodString>;
3640
+ }, z.core.$strict>>;
3591
3641
  description: z.ZodString;
3592
3642
  postcondition: z.ZodObject<{
3593
3643
  type: z.ZodEnum<{
@@ -4610,6 +4660,16 @@ export declare function safeParseProgramResult(data: unknown): z.ZodSafeParseRes
4610
4660
  captureId?: string | undefined;
4611
4661
  captureName?: string | undefined;
4612
4662
  elementSelector?: string | undefined;
4663
+ outscale?: {
4664
+ padding?: number | undefined;
4665
+ paddingTop?: number | undefined;
4666
+ paddingRight?: number | undefined;
4667
+ paddingBottom?: number | undefined;
4668
+ paddingLeft?: number | undefined;
4669
+ paddingPercent?: number | undefined;
4670
+ clampToViewport?: boolean | undefined;
4671
+ backgroundColor?: string | undefined;
4672
+ } | undefined;
4613
4673
  stepId?: string | undefined;
4614
4674
  } | {
4615
4675
  description: string;
@@ -242,12 +242,23 @@ const ScrollOpcodeSchema = z.object({
242
242
  targetSelector: z.string().optional(),
243
243
  target: SemanticTargetSchema.optional(),
244
244
  }).strict();
245
+ export const OutscaleConfigSchema = z.object({
246
+ padding: z.number().min(0).optional(),
247
+ paddingTop: z.number().min(0).optional(),
248
+ paddingRight: z.number().min(0).optional(),
249
+ paddingBottom: z.number().min(0).optional(),
250
+ paddingLeft: z.number().min(0).optional(),
251
+ paddingPercent: z.number().min(0).max(100).optional(),
252
+ clampToViewport: z.boolean().optional(),
253
+ backgroundColor: z.string().optional(),
254
+ }).strict();
245
255
  const CaptureScreenshotOpcodeSchema = z.object({
246
256
  kind: z.literal('CAPTURE_SCREENSHOT'),
247
257
  ...opcodeBase,
248
258
  captureId: z.string().optional(),
249
259
  captureName: z.string().optional(),
250
260
  elementSelector: z.string().optional(),
261
+ outscale: OutscaleConfigSchema.optional(),
251
262
  }).strict();
252
263
  const BeginClipOpcodeSchema = z.object({
253
264
  kind: z.literal('BEGIN_CLIP'),
@@ -4,7 +4,7 @@
4
4
  * All types for the compiled execution model:
5
5
  * preset (natural language) -> ExecutionProgram (typed IR) -> deterministic runtime
6
6
  */
7
- import type { AKTree, BrowserStorageState, BrowserSessionStorageState, VideoCursorTheme, VideoPageSignals } from './types.js';
7
+ import type { AKTree, BrowserStorageState, BrowserSessionStorageState, OutscaleConfig, VideoCursorTheme, VideoPageSignals } from './types.js';
8
8
  import type { MockupOptions } from './mockup.js';
9
9
  /** Sentinel value that resolves to the current variant's locale or theme at runtime */
10
10
  export declare const VARIANT_PLACEHOLDER: "$variant";
@@ -247,6 +247,8 @@ export interface CaptureScreenshotOpcode extends OpcodeBase {
247
247
  captureName?: string;
248
248
  /** Optional element selector for element-level capture */
249
249
  elementSelector?: string;
250
+ /** Optional padding around the captured element. Only applied when `elementSelector` is set. */
251
+ outscale?: OutscaleConfig;
250
252
  }
251
253
  export interface BeginClipOpcode extends OpcodeBase {
252
254
  kind: 'BEGIN_CLIP';
@@ -809,7 +811,7 @@ export interface RuntimeAdapter {
809
811
  method: string | null;
810
812
  }>;
811
813
  takeScreenshot(): Promise<Buffer>;
812
- takeElementScreenshot?(selector: string): Promise<Buffer>;
814
+ takeElementScreenshot?(selector: string, outscale?: OutscaleConfig): Promise<Buffer>;
813
815
  takeCleanScreenshot(): Promise<Buffer>;
814
816
  beginRecording(options: RecordingOptions): Promise<void>;
815
817
  endRecording(): Promise<RecordingResult>;
@@ -545,7 +545,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
545
545
  const captureUrl = await adapter.getCurrentUrl();
546
546
  const takeBuffer = async () => {
547
547
  if (opcode.elementSelector && adapter.takeElementScreenshot) {
548
- return adapter.takeElementScreenshot(opcode.elementSelector);
548
+ return adapter.takeElementScreenshot(opcode.elementSelector, opcode.outscale);
549
549
  }
550
550
  if (opcode.elementSelector) {
551
551
  throw new Error(`element capture requires adapter support for selector "${opcode.elementSelector}"`);
@@ -608,6 +608,16 @@ export declare const SignedExecutionProgramEnvelopeSchema: z.ZodObject<{
608
608
  captureId: z.ZodOptional<z.ZodString>;
609
609
  captureName: z.ZodOptional<z.ZodString>;
610
610
  elementSelector: z.ZodOptional<z.ZodString>;
611
+ outscale: z.ZodOptional<z.ZodObject<{
612
+ padding: z.ZodOptional<z.ZodNumber>;
613
+ paddingTop: z.ZodOptional<z.ZodNumber>;
614
+ paddingRight: z.ZodOptional<z.ZodNumber>;
615
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
616
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
617
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
618
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
619
+ backgroundColor: z.ZodOptional<z.ZodString>;
620
+ }, z.core.$strict>>;
611
621
  description: z.ZodString;
612
622
  postcondition: z.ZodObject<{
613
623
  type: z.ZodEnum<{
@@ -634,6 +634,16 @@ export declare const VideoIngestPayloadSchema: z.ZodObject<{
634
634
  captureId: z.ZodOptional<z.ZodString>;
635
635
  captureName: z.ZodOptional<z.ZodString>;
636
636
  elementSelector: z.ZodOptional<z.ZodString>;
637
+ outscale: z.ZodOptional<z.ZodObject<{
638
+ padding: z.ZodOptional<z.ZodNumber>;
639
+ paddingTop: z.ZodOptional<z.ZodNumber>;
640
+ paddingRight: z.ZodOptional<z.ZodNumber>;
641
+ paddingBottom: z.ZodOptional<z.ZodNumber>;
642
+ paddingLeft: z.ZodOptional<z.ZodNumber>;
643
+ paddingPercent: z.ZodOptional<z.ZodNumber>;
644
+ clampToViewport: z.ZodOptional<z.ZodBoolean>;
645
+ backgroundColor: z.ZodOptional<z.ZodString>;
646
+ }, z.core.$strict>>;
637
647
  description: z.ZodString;
638
648
  postcondition: z.ZodObject<{
639
649
  type: z.ZodEnum<{
@@ -5,7 +5,7 @@
5
5
  * This is the first (and for now only) RuntimeAdapter implementation.
6
6
  */
7
7
  import type { Browser } from './browser.js';
8
- import type { AKTree, VideoPageSignals } from './types.js';
8
+ import type { AKTree, OutscaleConfig, VideoPageSignals } from './types.js';
9
9
  import type { RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult, SemanticTarget } from './execution-types.js';
10
10
  import { type ResolveOptions } from './semantic-resolver.js';
11
11
  export declare class WebPlaywrightLocal implements RuntimeAdapter {
@@ -63,7 +63,7 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
63
63
  method: string | null;
64
64
  }>;
65
65
  takeScreenshot(): Promise<Buffer>;
66
- takeElementScreenshot(selector: string): Promise<Buffer>;
66
+ takeElementScreenshot(selector: string, outscale?: OutscaleConfig): Promise<Buffer>;
67
67
  takeCleanScreenshot(): Promise<Buffer>;
68
68
  beginRecording(options: RecordingOptions): Promise<void>;
69
69
  getElementBoundingBox(selector: string): Promise<{
@@ -165,4 +165,18 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
165
165
  private relativeClickPosition;
166
166
  private moveClipCursorToPoint;
167
167
  private emitClipClickPulse;
168
+ /**
169
+ * Drain the browser-side `__akClickAt` buffer for timestamps newer than
170
+ * `sinceMs`, replay them through `onClick`, and reset the buffer so the
171
+ * next click action starts fresh. This is what makes mouse SFX line up
172
+ * exactly with the visual click in the recorded video — the mousedown
173
+ * listener inside the cursor overlay timestamps each click at the same
174
+ * instant the browser dispatches it, which is also the frame the CDP
175
+ * screencast captures.
176
+ *
177
+ * Falls back to `sinceMs` (Node wall-clock at action dispatch) when the
178
+ * buffer is empty (e.g. `useKeyboard` Enter-press path, or transient
179
+ * page.evaluate failure).
180
+ */
181
+ private reportClickSfxTimestamps;
168
182
  }
@@ -77,12 +77,12 @@ export class WebPlaywrightLocal {
77
77
  const page = await this.browser.currentPage;
78
78
  const t0 = Date.now();
79
79
  logger.debug(`[click] start selector="${selector}"${options?.useKeyboard ? ' mode=keyboard' : ''}${options?.useJsDispatch ? ' mode=js_dispatch' : ''}${options?.coordinates ? ` mode=coords(${options.coordinates.x},${options.coordinates.y})` : ''}`);
80
- const fireClickSfx = () => options?.onClick?.(Date.now());
81
80
  try {
82
81
  if (options?.coordinates) {
83
82
  await this.moveClipCursorToPoint(options.coordinates);
84
- fireClickSfx();
83
+ const dispatchedAt = Date.now();
85
84
  await this.browser.clickByCoordinates(options.coordinates.x, options.coordinates.y);
85
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
86
86
  logger.debug(`[click] done coords took ${Date.now() - t0}ms`);
87
87
  return;
88
88
  }
@@ -90,14 +90,17 @@ export class WebPlaywrightLocal {
90
90
  const animatedTarget = await this.moveClipCursorToLocator(locator);
91
91
  if (options?.useKeyboard) {
92
92
  await locator.focus();
93
- fireClickSfx();
93
+ const dispatchedAt = Date.now();
94
94
  await page.keyboard.press('Enter');
95
+ // No real mousedown fires on Enter — fall back to Node timing.
96
+ options?.onClick?.(dispatchedAt);
95
97
  logger.debug(`[click] done keyboard took ${Date.now() - t0}ms`);
96
98
  return;
97
99
  }
98
100
  if (options?.useJsDispatch) {
99
- fireClickSfx();
101
+ const dispatchedAt = Date.now();
100
102
  await locator.dispatchEvent('click');
103
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
101
104
  logger.debug(`[click] done js_dispatch took ${Date.now() - t0}ms`);
102
105
  return;
103
106
  }
@@ -107,18 +110,19 @@ export class WebPlaywrightLocal {
107
110
  ? await this.relativeClickPosition(locator, animatedTarget)
108
111
  : null;
109
112
  if (options?.button && options.button !== 'left') {
110
- fireClickSfx();
113
+ const dispatchedAt = Date.now();
111
114
  await locator.click({
112
115
  button: options.button,
113
116
  timeout: 5000,
114
117
  force: options?.force,
115
118
  ...(clickPosition ? { position: clickPosition } : {}),
116
119
  });
120
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
117
121
  logger.debug(`[click] done button=${options.button} took ${Date.now() - t0}ms`);
118
122
  return;
119
123
  }
124
+ const dispatchedAt = Date.now();
120
125
  if (clickPosition) {
121
- fireClickSfx();
122
126
  await locator.click({
123
127
  timeout: 5000,
124
128
  force: options?.force,
@@ -126,9 +130,9 @@ export class WebPlaywrightLocal {
126
130
  });
127
131
  }
128
132
  else {
129
- fireClickSfx();
130
133
  await this.browser.clickBySelector(selector, { force: options?.force });
131
134
  }
135
+ await this.reportClickSfxTimestamps(page, dispatchedAt, options?.onClick);
132
136
  await this.emitClipClickPulse();
133
137
  logger.debug(`[click] done normal took ${Date.now() - t0}ms`);
134
138
  }
@@ -151,11 +155,12 @@ export class WebPlaywrightLocal {
151
155
  const position = target
152
156
  ? await this.relativeClickPosition(resolved.locator, target)
153
157
  : null;
154
- opts.onClick?.(Date.now());
158
+ const dispatchedAt = Date.now();
155
159
  await resolved.locator.click({
156
160
  timeout: 5000,
157
161
  ...(position ? { position } : {}),
158
162
  });
163
+ await this.reportClickSfxTimestamps(page, dispatchedAt, opts.onClick);
159
164
  }
160
165
  /**
161
166
  * Type into an element using semantic target resolution.
@@ -311,8 +316,8 @@ export class WebPlaywrightLocal {
311
316
  async takeScreenshot() {
312
317
  return this.browser.takeScreenshot();
313
318
  }
314
- async takeElementScreenshot(selector) {
315
- const { buffer } = await this.browser.screenshotBySelector(selector);
319
+ async takeElementScreenshot(selector, outscale) {
320
+ const { buffer } = await this.browser.screenshotBySelector(selector, outscale);
316
321
  return buffer;
317
322
  }
318
323
  async takeCleanScreenshot() {
@@ -649,13 +654,14 @@ export class WebPlaywrightLocal {
649
654
  ? await this.relativeClickPosition(locator, target)
650
655
  : null;
651
656
  const opts = { timeout: 5000, ...(position ? { position } : {}) };
652
- actionOpts?.onClick?.(Date.now());
657
+ const dispatchedAt = Date.now();
653
658
  if (checked) {
654
659
  await locator.check(opts);
655
660
  }
656
661
  else {
657
662
  await locator.uncheck(opts);
658
663
  }
664
+ await this.reportClickSfxTimestamps(page, dispatchedAt, actionOpts?.onClick);
659
665
  }
660
666
  async doubleClick(selector, actionOpts) {
661
667
  const page = await this.browser.currentPage;
@@ -664,11 +670,12 @@ export class WebPlaywrightLocal {
664
670
  const position = target
665
671
  ? await this.relativeClickPosition(locator, target)
666
672
  : null;
667
- actionOpts?.onClick?.(Date.now());
673
+ const dispatchedAt = Date.now();
668
674
  await locator.dblclick({
669
675
  timeout: 5000,
670
676
  ...(position ? { position } : {}),
671
677
  });
678
+ await this.reportClickSfxTimestamps(page, dispatchedAt, actionOpts?.onClick);
672
679
  }
673
680
  async drag(opts) {
674
681
  const page = await this.browser.currentPage;
@@ -1021,6 +1028,47 @@ export class WebPlaywrightLocal {
1021
1028
  window.__akClickPulse(px, py);
1022
1029
  }, { px: Math.round(x), py: Math.round(y) }).catch(() => { });
1023
1030
  }
1031
+ /**
1032
+ * Drain the browser-side `__akClickAt` buffer for timestamps newer than
1033
+ * `sinceMs`, replay them through `onClick`, and reset the buffer so the
1034
+ * next click action starts fresh. This is what makes mouse SFX line up
1035
+ * exactly with the visual click in the recorded video — the mousedown
1036
+ * listener inside the cursor overlay timestamps each click at the same
1037
+ * instant the browser dispatches it, which is also the frame the CDP
1038
+ * screencast captures.
1039
+ *
1040
+ * Falls back to `sinceMs` (Node wall-clock at action dispatch) when the
1041
+ * buffer is empty (e.g. `useKeyboard` Enter-press path, or transient
1042
+ * page.evaluate failure).
1043
+ */
1044
+ async reportClickSfxTimestamps(page, sinceMs, onClick) {
1045
+ if (!onClick)
1046
+ return;
1047
+ let captured = [];
1048
+ try {
1049
+ captured = (await page.evaluate((cutoff) => {
1050
+ const buf = window.__akClickAt;
1051
+ if (!Array.isArray(buf))
1052
+ return [];
1053
+ const fresh = buf.filter((t) => typeof t === 'number' && t >= cutoff);
1054
+ // Reset the buffer so successive click actions don't see each
1055
+ // others' timestamps. We mutate the array in place to keep the
1056
+ // reference stable for any other consumers (none today).
1057
+ buf.length = 0;
1058
+ return fresh;
1059
+ }, sinceMs));
1060
+ }
1061
+ catch {
1062
+ captured = [];
1063
+ }
1064
+ if (captured.length > 0) {
1065
+ for (const t of captured)
1066
+ onClick(t);
1067
+ }
1068
+ else {
1069
+ onClick(sinceMs);
1070
+ }
1071
+ }
1024
1072
  }
1025
1073
  function describeResolveOptions(opts) {
1026
1074
  const parts = [];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.5.3",
3
+ "version": "1.5.5",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",