autokap 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/skill/OPCODE-REFERENCE.md +18 -1
- package/assets/skill/SKILL.md +54 -11
- package/dist/browser.js +23 -1
- package/dist/capture-strategy.d.ts +14 -0
- package/dist/capture-strategy.js +28 -0
- package/dist/cli-contract.d.ts +61 -0
- package/dist/cli-runner.d.ts +10 -1
- package/dist/cli-runner.js +415 -20
- package/dist/cli.js +124 -2
- package/dist/clip-capture-loop.js +11 -2
- package/dist/cookie-dismiss.d.ts +1 -0
- package/dist/cookie-dismiss.js +13 -1
- package/dist/execution-schema.d.ts +303 -2
- package/dist/execution-schema.js +77 -4
- package/dist/execution-types.d.ts +114 -5
- package/dist/execution-types.js +2 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +2 -0
- package/dist/mouse-animation.d.ts +12 -2
- package/dist/mouse-animation.js +36 -6
- package/dist/opcode-actions.d.ts +2 -0
- package/dist/opcode-actions.js +39 -5
- package/dist/opcode-runner.d.ts +2 -0
- package/dist/opcode-runner.js +139 -17
- package/dist/openrouter-tts.d.ts +74 -0
- package/dist/openrouter-tts.js +218 -0
- package/dist/postcondition.js +36 -26
- package/dist/program-signing.d.ts +67 -0
- package/dist/recovery-chain.js +26 -12
- package/dist/server-credit-usage.d.ts +1 -1
- package/dist/video-narration-schema.d.ts +1165 -0
- package/dist/video-narration-schema.js +137 -0
- package/dist/web-playwright-local.d.ts +16 -0
- package/dist/web-playwright-local.js +204 -18
- package/package.json +9 -1
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AUT-57 — Video narration overlay schema.
|
|
3
|
+
*
|
|
4
|
+
* The opcode-based video demo feature stores a regular `ExecutionProgram`
|
|
5
|
+
* in a preset config. Narration is authored on `SLEEP` opcodes via stable
|
|
6
|
+
* `stepId` anchors plus `narrationTextByLocale`; `autokap run` extracts those
|
|
7
|
+
* anchors into this overlay shape, renders TTS, then persists the overlay and
|
|
8
|
+
* audio assets under `presets.config.videoDemo`.
|
|
9
|
+
*/
|
|
10
|
+
import { z } from 'zod';
|
|
11
|
+
import { ExecutionProgramSchema } from './execution-schema.js';
|
|
12
|
+
export const SUPPORTED_TTS_LOCALES = ['en', 'fr'];
|
|
13
|
+
export function normalizeLocaleTag(locale) {
|
|
14
|
+
return locale.trim().replace(/_/g, '-');
|
|
15
|
+
}
|
|
16
|
+
export function primaryLocaleSubtag(locale) {
|
|
17
|
+
return normalizeLocaleTag(locale).split('-')[0]?.toLowerCase() ?? '';
|
|
18
|
+
}
|
|
19
|
+
export function resolveSupportedTtsLocale(locale) {
|
|
20
|
+
const primary = primaryLocaleSubtag(locale);
|
|
21
|
+
return SUPPORTED_TTS_LOCALES.includes(primary)
|
|
22
|
+
? primary
|
|
23
|
+
: null;
|
|
24
|
+
}
|
|
25
|
+
/** `stepId` constraint shared with `OpcodeBase` — kebab-case, 1..64 chars. */
|
|
26
|
+
const NarrationStepIdSchema = z.string()
|
|
27
|
+
.regex(/^[a-z0-9-]+$/, {
|
|
28
|
+
message: 'stepId must match /^[a-z0-9-]+$/ (lowercase kebab-case)',
|
|
29
|
+
})
|
|
30
|
+
.min(1)
|
|
31
|
+
.max(64);
|
|
32
|
+
/**
|
|
33
|
+
* One narration segment anchored to a single opcode `stepId`. The TTS provider
|
|
34
|
+
* receives `text` verbatim; `estimated_duration_ms` is the assistant's guess
|
|
35
|
+
* (used for diagnostics, not authoritative — the run-time TTS step
|
|
36
|
+
* (PR #4) measures the real audio length via ffprobe).
|
|
37
|
+
*/
|
|
38
|
+
export const NarrationSegmentSchema = z.object({
|
|
39
|
+
stepId: NarrationStepIdSchema,
|
|
40
|
+
text: z.string().min(1).max(2000),
|
|
41
|
+
estimated_duration_ms: z.number().int().positive().max(60_000),
|
|
42
|
+
}).strict();
|
|
43
|
+
/**
|
|
44
|
+
* Locale-aware narration overlay. `voice` is an opaque provider-side handle
|
|
45
|
+
* (e.g. `openai/gpt-4o-mini-tts:nova`); the backend validates it against the
|
|
46
|
+
* cached OpenRouter voice list (PR #3+).
|
|
47
|
+
*
|
|
48
|
+
* TTS support is intentionally catalog-gated: app locales can be arbitrary,
|
|
49
|
+
* but narration locales must currently resolve to `en` or `fr`.
|
|
50
|
+
*/
|
|
51
|
+
export const VideoNarrationOverlaySchema = z.object({
|
|
52
|
+
voice: z.string().min(1).max(128),
|
|
53
|
+
locale: z.string().min(2).max(16),
|
|
54
|
+
segments: z.array(NarrationSegmentSchema).min(1).max(200),
|
|
55
|
+
}).strict().superRefine((value, ctx) => {
|
|
56
|
+
if (!resolveSupportedTtsLocale(value.locale)) {
|
|
57
|
+
ctx.addIssue({
|
|
58
|
+
code: z.ZodIssueCode.custom,
|
|
59
|
+
path: ['locale'],
|
|
60
|
+
message: `unsupported TTS locale "${value.locale}" — supported locales: ${SUPPORTED_TTS_LOCALES.join(', ')}`,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
const seen = new Set();
|
|
64
|
+
for (const [index, seg] of value.segments.entries()) {
|
|
65
|
+
if (seen.has(seg.stepId)) {
|
|
66
|
+
ctx.addIssue({
|
|
67
|
+
code: z.ZodIssueCode.custom,
|
|
68
|
+
path: ['segments', index, 'stepId'],
|
|
69
|
+
message: `duplicate stepId in narration: "${seg.stepId}"`,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
seen.add(seg.stepId);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
/**
|
|
76
|
+
* Legacy ingest payload shape retained for compatibility tests. The current
|
|
77
|
+
* demo-video flow persists the program as `presets.config.program`; narration
|
|
78
|
+
* is extracted from `SLEEP.narrationTextByLocale` during `autokap run`.
|
|
79
|
+
*
|
|
80
|
+
* Cross-validation:
|
|
81
|
+
* 1. `program.mediaMode === 'video'`.
|
|
82
|
+
* 2. When `narration` is present, every `narration.segments[*].stepId` must
|
|
83
|
+
* match a `program.steps[*].stepId`.
|
|
84
|
+
*/
|
|
85
|
+
export const VideoIngestPayloadSchema = z.object({
|
|
86
|
+
program: ExecutionProgramSchema,
|
|
87
|
+
narration: VideoNarrationOverlaySchema.optional(),
|
|
88
|
+
}).strict().superRefine((value, ctx) => {
|
|
89
|
+
if (value.program.mediaMode !== 'video') {
|
|
90
|
+
ctx.addIssue({
|
|
91
|
+
code: z.ZodIssueCode.custom,
|
|
92
|
+
path: ['program', 'mediaMode'],
|
|
93
|
+
message: `ingest payload requires program.mediaMode='video', got '${value.program.mediaMode}'`,
|
|
94
|
+
});
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
if (!value.narration)
|
|
98
|
+
return;
|
|
99
|
+
const programStepIds = new Set();
|
|
100
|
+
// value.program here is `unknown`-shaped from Zod's perspective at refine
|
|
101
|
+
// time, but the ExecutionProgramSchema has already passed by the time we
|
|
102
|
+
// run — narrowing via cast is safe.
|
|
103
|
+
for (const op of value.program.steps) {
|
|
104
|
+
if (typeof op.stepId === 'string') {
|
|
105
|
+
programStepIds.add(op.stepId);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
for (const [index, seg] of value.narration.segments.entries()) {
|
|
109
|
+
if (!programStepIds.has(seg.stepId)) {
|
|
110
|
+
ctx.addIssue({
|
|
111
|
+
code: z.ZodIssueCode.custom,
|
|
112
|
+
path: ['narration', 'segments', index, 'stepId'],
|
|
113
|
+
message: `narration references unknown stepId "${seg.stepId}" — no opcode in program.steps carries that stepId`,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
/** Parse an ingest payload, throwing a ZodError on invalid input. */
|
|
119
|
+
export function parseVideoIngestPayload(data) {
|
|
120
|
+
return VideoIngestPayloadSchema.parse(data);
|
|
121
|
+
}
|
|
122
|
+
/** Status state machine values for `videos.status` (opcode flow). */
|
|
123
|
+
export const VIDEO_STATUSES = [
|
|
124
|
+
'draft',
|
|
125
|
+
'preparing',
|
|
126
|
+
'ready_to_run',
|
|
127
|
+
'running',
|
|
128
|
+
'uploading',
|
|
129
|
+
'compositing',
|
|
130
|
+
'done',
|
|
131
|
+
'failed',
|
|
132
|
+
];
|
|
133
|
+
/** Discriminator value for opcode-flow rows in the `videos` table. */
|
|
134
|
+
export const VIDEO_FLOW_KIND_OPCODE = 'opcode_video';
|
|
135
|
+
/** Discriminator value for legacy planner rows (migration 020 flow). */
|
|
136
|
+
export const VIDEO_FLOW_KIND_LEGACY = 'legacy_planner';
|
|
137
|
+
//# sourceMappingURL=video-narration-schema.js.map
|
|
@@ -14,6 +14,16 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
|
|
|
14
14
|
private readonly sessionStartedAt;
|
|
15
15
|
private recording;
|
|
16
16
|
private clipCursor;
|
|
17
|
+
/**
|
|
18
|
+
* Flags / hooks that surface mid-recording navigations in CLI logs. When a
|
|
19
|
+
* navigation (NAVIGATE opcode, SET_LOCALE storage reload) is initiated by
|
|
20
|
+
* the runner, `expectedNavigationCount` is bumped before the call and
|
|
21
|
+
* decremented when the framenavigated handler observes it — so any nav that
|
|
22
|
+
* fires WITHOUT a pending count is logged as UNEXPECTED (likely page-side
|
|
23
|
+
* `location.reload()`, session redirect, or HMR). `detach()` removes the
|
|
24
|
+
* listeners when recording ends.
|
|
25
|
+
*/
|
|
26
|
+
private recordingNavWatcher;
|
|
17
27
|
constructor(browser: Browser, recordingDir?: string | undefined);
|
|
18
28
|
navigate(url: string): Promise<void>;
|
|
19
29
|
getCurrentUrl(): Promise<string>;
|
|
@@ -50,6 +60,12 @@ export declare class WebPlaywrightLocal implements RuntimeAdapter {
|
|
|
50
60
|
takeElementScreenshot(selector: string): Promise<Buffer>;
|
|
51
61
|
takeCleanScreenshot(): Promise<Buffer>;
|
|
52
62
|
beginRecording(options: RecordingOptions): Promise<void>;
|
|
63
|
+
getElementBoundingBox(selector: string): Promise<{
|
|
64
|
+
x: number;
|
|
65
|
+
y: number;
|
|
66
|
+
width: number;
|
|
67
|
+
height: number;
|
|
68
|
+
} | null>;
|
|
53
69
|
endRecording(): Promise<RecordingResult>;
|
|
54
70
|
setLocale(locale: string): Promise<void>;
|
|
55
71
|
setColorScheme(scheme: 'light' | 'dark'): Promise<void>;
|
|
@@ -7,23 +7,60 @@
|
|
|
7
7
|
import fs from 'node:fs/promises';
|
|
8
8
|
import os from 'node:os';
|
|
9
9
|
import path from 'node:path';
|
|
10
|
-
import { humanType, moveMouse } from './mouse-animation.js';
|
|
10
|
+
import { humanType, moveMouse, } from './mouse-animation.js';
|
|
11
11
|
import { resolveTarget } from './semantic-resolver.js';
|
|
12
12
|
import { logger } from './logger.js';
|
|
13
13
|
import { ClipCaptureLoop } from './clip-capture-loop.js';
|
|
14
|
-
import { assembleMp4FromFrames } from './clip-postprocess.js';
|
|
14
|
+
import { assembleMp4FromFrames, getMediaDurationMs } from './clip-postprocess.js';
|
|
15
15
|
export class WebPlaywrightLocal {
|
|
16
16
|
browser;
|
|
17
17
|
recordingDir;
|
|
18
18
|
sessionStartedAt = Date.now();
|
|
19
19
|
recording = null;
|
|
20
20
|
clipCursor = null;
|
|
21
|
+
/**
|
|
22
|
+
* Flags / hooks that surface mid-recording navigations in CLI logs. When a
|
|
23
|
+
* navigation (NAVIGATE opcode, SET_LOCALE storage reload) is initiated by
|
|
24
|
+
* the runner, `expectedNavigationCount` is bumped before the call and
|
|
25
|
+
* decremented when the framenavigated handler observes it — so any nav that
|
|
26
|
+
* fires WITHOUT a pending count is logged as UNEXPECTED (likely page-side
|
|
27
|
+
* `location.reload()`, session redirect, or HMR). `detach()` removes the
|
|
28
|
+
* listeners when recording ends.
|
|
29
|
+
*/
|
|
30
|
+
recordingNavWatcher = null;
|
|
21
31
|
constructor(browser, recordingDir) {
|
|
22
32
|
this.browser = browser;
|
|
23
33
|
this.recordingDir = recordingDir;
|
|
24
34
|
}
|
|
25
35
|
async navigate(url) {
|
|
26
|
-
|
|
36
|
+
// Snapshot the cursor position BEFORE navigating — `page.goto` replaces
|
|
37
|
+
// the document and wipes the `<div id="__ak_cursor__">` overlay element.
|
|
38
|
+
// The cursor init script is re-run on the new document but the cursor
|
|
39
|
+
// starts off-screen until the next interaction. During a long SLEEP
|
|
40
|
+
// narration that follows a NAVIGATE, no cursor would be visible at all.
|
|
41
|
+
const lastCursorPosition = this.recording && this.clipCursor
|
|
42
|
+
? this.clipCursor.currentPosition
|
|
43
|
+
: null;
|
|
44
|
+
if (this.recordingNavWatcher)
|
|
45
|
+
this.recordingNavWatcher.expectedNavigationCount += 1;
|
|
46
|
+
try {
|
|
47
|
+
await this.browser.navigateTo(url);
|
|
48
|
+
}
|
|
49
|
+
finally {
|
|
50
|
+
// Drop the budget after a brief settle window — Playwright's framenavigated
|
|
51
|
+
// for the load can fire after navigateTo's promise resolves on slow apps.
|
|
52
|
+
if (this.recordingNavWatcher) {
|
|
53
|
+
const watcher = this.recordingNavWatcher;
|
|
54
|
+
setTimeout(() => {
|
|
55
|
+
if (watcher === this.recordingNavWatcher) {
|
|
56
|
+
watcher.expectedNavigationCount = Math.max(0, watcher.expectedNavigationCount - 1);
|
|
57
|
+
}
|
|
58
|
+
}, 500);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (this.recording && this.clipCursor) {
|
|
62
|
+
await this.seedClipCursor(lastCursorPosition ?? undefined);
|
|
63
|
+
}
|
|
27
64
|
}
|
|
28
65
|
async getCurrentUrl() {
|
|
29
66
|
const page = await this.browser.currentPage;
|
|
@@ -273,16 +310,55 @@ export class WebPlaywrightLocal {
|
|
|
273
310
|
return this.browser.takeScreenshot();
|
|
274
311
|
}
|
|
275
312
|
async beginRecording(options) {
|
|
276
|
-
|
|
313
|
+
let page = await this.browser.currentPage;
|
|
314
|
+
if (options.captureResolution) {
|
|
315
|
+
const alreadyMatchesCaptureSurface = options.mediaMode === 'video'
|
|
316
|
+
? await captureSurfaceMatches(page, options.captureResolution)
|
|
317
|
+
: false;
|
|
318
|
+
if (!alreadyMatchesCaptureSurface) {
|
|
319
|
+
await this.browser.resizeViewport(options.captureResolution.width, options.captureResolution.height);
|
|
320
|
+
page = await this.browser.currentPage;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
// Surface diagnostic — captures what frames will actually be at, so a
|
|
324
|
+
// mismatch between the configured 1920×1080 and the rendered surface is
|
|
325
|
+
// visible in CLI logs without instrumenting the user's machine.
|
|
326
|
+
try {
|
|
327
|
+
const surfaceCheck = await page.evaluate(() => ({
|
|
328
|
+
innerWidth: window.innerWidth,
|
|
329
|
+
innerHeight: window.innerHeight,
|
|
330
|
+
dpr: window.devicePixelRatio,
|
|
331
|
+
}));
|
|
332
|
+
const capturedW = Math.round(surfaceCheck.innerWidth * surfaceCheck.dpr);
|
|
333
|
+
const capturedH = Math.round(surfaceCheck.innerHeight * surfaceCheck.dpr);
|
|
334
|
+
logger.info(`[capture] Recording surface: CSS ${surfaceCheck.innerWidth}×${surfaceCheck.innerHeight} ` +
|
|
335
|
+
`@DPR=${surfaceCheck.dpr} → captured frames ${capturedW}×${capturedH} ` +
|
|
336
|
+
`(requested ${options.captureResolution?.width ?? 'auto'}×${options.captureResolution?.height ?? 'auto'})`);
|
|
337
|
+
}
|
|
338
|
+
catch {
|
|
339
|
+
// Best-effort diagnostic — non-fatal.
|
|
340
|
+
}
|
|
277
341
|
const baseDir = this.recordingDir
|
|
278
342
|
?? await fs.mkdtemp(path.join(os.tmpdir(), 'autokap-recording-'));
|
|
279
343
|
const framesDir = path.join(baseDir, 'frames');
|
|
280
344
|
await fs.mkdir(framesDir, { recursive: true });
|
|
345
|
+
// Linux defaults are conservative because GitHub Actions free runners
|
|
346
|
+
// (2 vCPU) can't sustain 30 fps. AUTOKAP_CLOUD_RUNNER=1 signals that the
|
|
347
|
+
// process is running on managed cloud infra (Fly.io machines, ≥4 vCPU)
|
|
348
|
+
// where the cap can safely lift to 30 fps for clips too. Set by the
|
|
349
|
+
// `--cloud` flag and by the cloud-runner Docker image.
|
|
350
|
+
const isCloudRunner = process.env.AUTOKAP_CLOUD_RUNNER === '1';
|
|
351
|
+
const defaultFps = process.platform === 'linux' && !isCloudRunner ? 8 : 15;
|
|
352
|
+
const cloudClipFps = isCloudRunner ? 30 : defaultFps;
|
|
353
|
+
const targetFps = options.captureFps
|
|
354
|
+
?? (options.mediaMode === 'video' ? 30 : cloudClipFps);
|
|
281
355
|
const loop = new ClipCaptureLoop({
|
|
282
356
|
page,
|
|
283
357
|
framesDir,
|
|
284
|
-
targetFps
|
|
285
|
-
|
|
358
|
+
targetFps,
|
|
359
|
+
// Cloud runners have CPU headroom — drop the Linux 50 ms idle cushion
|
|
360
|
+
// (sized for tight CI runners) to let the loop stay close to its target.
|
|
361
|
+
minRestMs: process.platform === 'linux' && !isCloudRunner ? 50 : 16,
|
|
286
362
|
});
|
|
287
363
|
await loop.start();
|
|
288
364
|
this.recording = {
|
|
@@ -293,22 +369,97 @@ export class WebPlaywrightLocal {
|
|
|
293
369
|
loop,
|
|
294
370
|
finalized: false,
|
|
295
371
|
};
|
|
296
|
-
this.clipCursor = {
|
|
372
|
+
this.clipCursor = {
|
|
373
|
+
currentPosition: null,
|
|
374
|
+
pace: options.mediaMode === 'video' ? 'natural' : 'fast',
|
|
375
|
+
};
|
|
376
|
+
// Attach navigation/error watchers for the recording window. These surface
|
|
377
|
+
// the cause of mid-recording refreshes (cursor disappears, "robot" effect)
|
|
378
|
+
// that the existing reload guards don't catch — page-side `location.reload()`,
|
|
379
|
+
// session redirects, Next.js HMR, soft router refreshes.
|
|
380
|
+
const recordingStartedAt = Date.now();
|
|
381
|
+
let lastUrl = page.url();
|
|
382
|
+
const navHandler = (frame) => {
|
|
383
|
+
if (frame !== page.mainFrame())
|
|
384
|
+
return;
|
|
385
|
+
const elapsedMs = Date.now() - recordingStartedAt;
|
|
386
|
+
const url = frame.url();
|
|
387
|
+
const sameUrl = url === lastUrl;
|
|
388
|
+
lastUrl = url;
|
|
389
|
+
const watcher = this.recordingNavWatcher;
|
|
390
|
+
const expected = watcher && watcher.expectedNavigationCount > 0;
|
|
391
|
+
if (expected && watcher) {
|
|
392
|
+
watcher.expectedNavigationCount = Math.max(0, watcher.expectedNavigationCount - 1);
|
|
393
|
+
logger.info(`[capture] [recording nav +${elapsedMs}ms expected] → ${url}`);
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
logger.warn(`[capture] [recording nav +${elapsedMs}ms framenavigated ${sameUrl ? 'same-url' : 'cross-url'}] → ${url}`);
|
|
397
|
+
}
|
|
398
|
+
};
|
|
399
|
+
// `load` ONLY fires on full document loads (page.goto, location.reload).
|
|
400
|
+
// SPA URL changes via pushState/popstate do NOT fire load. So if we see
|
|
401
|
+
// load events during recording that aren't tied to an opcode in flight,
|
|
402
|
+
// the document is being replaced — that's the white-flash + cursor-loss
|
|
403
|
+
// pattern. Surface it loudly so the cause (page-side reload, session
|
|
404
|
+
// redirect, HMR, mis-shaped NAVIGATE opcode) can be diagnosed quickly.
|
|
405
|
+
const loadHandler = () => {
|
|
406
|
+
const elapsedMs = Date.now() - recordingStartedAt;
|
|
407
|
+
logger.warn(`[capture] [recording document LOAD +${elapsedMs}ms] ${page.url()} — full document load (white flash + cursor loss)`);
|
|
408
|
+
};
|
|
409
|
+
const errorHandler = (err) => {
|
|
410
|
+
logger.warn(`[capture] [recording page error] ${err.message.split('\n')[0]}`);
|
|
411
|
+
};
|
|
412
|
+
page.on('framenavigated', navHandler);
|
|
413
|
+
page.on('load', loadHandler);
|
|
414
|
+
page.on('pageerror', errorHandler);
|
|
415
|
+
this.recordingNavWatcher = {
|
|
416
|
+
startedAt: recordingStartedAt,
|
|
417
|
+
expectedNavigationCount: 0,
|
|
418
|
+
detach: () => {
|
|
419
|
+
try {
|
|
420
|
+
page.off('framenavigated', navHandler);
|
|
421
|
+
}
|
|
422
|
+
catch { /* ignore */ }
|
|
423
|
+
try {
|
|
424
|
+
page.off('load', loadHandler);
|
|
425
|
+
}
|
|
426
|
+
catch { /* ignore */ }
|
|
427
|
+
try {
|
|
428
|
+
page.off('pageerror', errorHandler);
|
|
429
|
+
}
|
|
430
|
+
catch { /* ignore */ }
|
|
431
|
+
},
|
|
432
|
+
};
|
|
297
433
|
await this.seedClipCursor();
|
|
298
434
|
}
|
|
435
|
+
async getElementBoundingBox(selector) {
|
|
436
|
+
const page = await this.browser.currentPage;
|
|
437
|
+
try {
|
|
438
|
+
return await page.locator(selector).first().boundingBox();
|
|
439
|
+
}
|
|
440
|
+
catch {
|
|
441
|
+
return null;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
299
444
|
async endRecording() {
|
|
300
445
|
if (!this.recording) {
|
|
301
446
|
throw new Error('recording was not started');
|
|
302
447
|
}
|
|
303
448
|
if (this.recording.finalized) {
|
|
304
449
|
const buffer = await fs.readFile(this.recording.mp4Path);
|
|
450
|
+
const durationMs = this.recording.encodedDurationMs ?? await getMediaDurationMs(this.recording.mp4Path);
|
|
451
|
+
this.recording.encodedDurationMs = durationMs;
|
|
305
452
|
return {
|
|
306
453
|
buffer,
|
|
307
|
-
durationMs
|
|
454
|
+
durationMs,
|
|
308
455
|
mimeType: 'video/mp4',
|
|
309
456
|
trimStartMs: this.recording.result?.trimStartMs ?? 0,
|
|
310
457
|
};
|
|
311
458
|
}
|
|
459
|
+
if (this.recordingNavWatcher) {
|
|
460
|
+
this.recordingNavWatcher.detach();
|
|
461
|
+
this.recordingNavWatcher = null;
|
|
462
|
+
}
|
|
312
463
|
const result = await this.recording.loop.stop();
|
|
313
464
|
logger.info(`[capture] Clip frame capture: ${result.frameCount} frame(s), ` +
|
|
314
465
|
`${result.measuredFps.toFixed(1)} fps over ${(result.actualDurationMs / 1000).toFixed(2)}s ` +
|
|
@@ -324,11 +475,12 @@ export class WebPlaywrightLocal {
|
|
|
324
475
|
});
|
|
325
476
|
this.recording.finalized = true;
|
|
326
477
|
this.recording.result = result;
|
|
478
|
+
this.recording.encodedDurationMs = await getMediaDurationMs(this.recording.mp4Path);
|
|
327
479
|
this.clipCursor = null;
|
|
328
480
|
const buffer = await fs.readFile(this.recording.mp4Path);
|
|
329
481
|
return {
|
|
330
482
|
buffer,
|
|
331
|
-
durationMs:
|
|
483
|
+
durationMs: this.recording.encodedDurationMs,
|
|
332
484
|
mimeType: 'video/mp4',
|
|
333
485
|
trimStartMs: result.trimStartMs,
|
|
334
486
|
};
|
|
@@ -340,7 +492,21 @@ export class WebPlaywrightLocal {
|
|
|
340
492
|
await this.browser.setColorScheme(scheme);
|
|
341
493
|
}
|
|
342
494
|
async reloadPage() {
|
|
343
|
-
|
|
495
|
+
if (this.recordingNavWatcher)
|
|
496
|
+
this.recordingNavWatcher.expectedNavigationCount += 1;
|
|
497
|
+
try {
|
|
498
|
+
await this.browser.reloadCurrentPage();
|
|
499
|
+
}
|
|
500
|
+
finally {
|
|
501
|
+
if (this.recordingNavWatcher) {
|
|
502
|
+
const watcher = this.recordingNavWatcher;
|
|
503
|
+
setTimeout(() => {
|
|
504
|
+
if (watcher === this.recordingNavWatcher) {
|
|
505
|
+
watcher.expectedNavigationCount = Math.max(0, watcher.expectedNavigationCount - 1);
|
|
506
|
+
}
|
|
507
|
+
}, 500);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
344
510
|
}
|
|
345
511
|
async writeStorageHint(params) {
|
|
346
512
|
if (params.storage === 'cookie') {
|
|
@@ -494,7 +660,10 @@ export class WebPlaywrightLocal {
|
|
|
494
660
|
}
|
|
495
661
|
}).catch(() => { });
|
|
496
662
|
}
|
|
497
|
-
await moveMouse(page, sourcePoint, destPoint, {
|
|
663
|
+
await moveMouse(page, sourcePoint, destPoint, {
|
|
664
|
+
durationMs: dragDurationMs,
|
|
665
|
+
...(this.clipCursor ? { pace: this.clipCursor.pace } : {}),
|
|
666
|
+
});
|
|
498
667
|
await page.waitForTimeout(70 + Math.random() * 60);
|
|
499
668
|
await page.mouse.up();
|
|
500
669
|
if (this.clipCursor) {
|
|
@@ -663,21 +832,25 @@ export class WebPlaywrightLocal {
|
|
|
663
832
|
? { minDelayMs: 20, maxDelayMs: 45 }
|
|
664
833
|
: undefined);
|
|
665
834
|
}
|
|
666
|
-
async seedClipCursor() {
|
|
835
|
+
async seedClipCursor(position) {
|
|
667
836
|
if (!this.clipCursor)
|
|
668
837
|
return;
|
|
669
838
|
const page = await this.browser.currentPage;
|
|
670
839
|
const viewport = page.viewportSize();
|
|
671
840
|
if (!viewport)
|
|
672
841
|
return;
|
|
673
|
-
const
|
|
674
|
-
|
|
675
|
-
|
|
842
|
+
const targetX = position
|
|
843
|
+
? Math.max(0, Math.min(viewport.width, Math.round(position.x)))
|
|
844
|
+
: Math.round(viewport.width * (0.3 + Math.random() * 0.4));
|
|
845
|
+
const targetY = position
|
|
846
|
+
? Math.max(0, Math.min(viewport.height, Math.round(position.y)))
|
|
847
|
+
: Math.round(viewport.height * (0.3 + Math.random() * 0.4));
|
|
848
|
+
await page.mouse.move(targetX, targetY);
|
|
676
849
|
await page.evaluate(({ x, y }) => {
|
|
677
850
|
if (typeof window.__akMoveCursor === 'function')
|
|
678
851
|
window.__akMoveCursor(x, y);
|
|
679
|
-
}, { x:
|
|
680
|
-
this.clipCursor.currentPosition = { x:
|
|
852
|
+
}, { x: targetX, y: targetY }).catch(() => { });
|
|
853
|
+
this.clipCursor.currentPosition = { x: targetX, y: targetY };
|
|
681
854
|
await page.waitForTimeout(60);
|
|
682
855
|
}
|
|
683
856
|
async moveClipCursorToViewportCenter() {
|
|
@@ -748,7 +921,7 @@ export class WebPlaywrightLocal {
|
|
|
748
921
|
const page = await this.browser.currentPage;
|
|
749
922
|
const from = this.clipCursor.currentPosition;
|
|
750
923
|
if (from) {
|
|
751
|
-
await moveMouse(page, from, point, options);
|
|
924
|
+
await moveMouse(page, from, point, { ...options, pace: this.clipCursor.pace });
|
|
752
925
|
}
|
|
753
926
|
else {
|
|
754
927
|
await page.mouse.move(point.x, point.y);
|
|
@@ -784,6 +957,19 @@ function describeResolveOptions(opts) {
|
|
|
784
957
|
parts.push(`placeholder="${opts.target.placeholder}"`);
|
|
785
958
|
return parts.join(', ') || 'no target specified';
|
|
786
959
|
}
|
|
960
|
+
async function captureSurfaceMatches(page, expected) {
|
|
961
|
+
try {
|
|
962
|
+
const actual = await page.evaluate(() => ({
|
|
963
|
+
width: Math.round(window.innerWidth * window.devicePixelRatio),
|
|
964
|
+
height: Math.round(window.innerHeight * window.devicePixelRatio),
|
|
965
|
+
}));
|
|
966
|
+
return Math.abs(actual.width - expected.width) <= 2
|
|
967
|
+
&& Math.abs(actual.height - expected.height) <= 2;
|
|
968
|
+
}
|
|
969
|
+
catch {
|
|
970
|
+
return false;
|
|
971
|
+
}
|
|
972
|
+
}
|
|
787
973
|
function getHumanPointInBox(box, viewport) {
|
|
788
974
|
const insetX = Math.min(Math.max(box.width * 0.2, 6), Math.max(6, box.width / 2));
|
|
789
975
|
const insetY = Math.min(Math.max(box.height * 0.2, 6), Math.max(6, box.height / 2));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "autokap",
|
|
3
|
-
"version": "1.2
|
|
3
|
+
"version": "1.3.2",
|
|
4
4
|
"description": "AI-powered CLI tool for capturing clean screenshots of websites",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -34,6 +34,14 @@
|
|
|
34
34
|
"types": "./dist/execution-types.d.ts",
|
|
35
35
|
"default": "./dist/execution-types.js"
|
|
36
36
|
},
|
|
37
|
+
"./video-narration-schema": {
|
|
38
|
+
"types": "./dist/video-narration-schema.d.ts",
|
|
39
|
+
"default": "./dist/video-narration-schema.js"
|
|
40
|
+
},
|
|
41
|
+
"./openrouter-tts": {
|
|
42
|
+
"types": "./dist/openrouter-tts.d.ts",
|
|
43
|
+
"default": "./dist/openrouter-tts.js"
|
|
44
|
+
},
|
|
37
45
|
"./alt-text": {
|
|
38
46
|
"types": "./dist/alt-text.d.ts",
|
|
39
47
|
"default": "./dist/alt-text.js"
|