autokap 1.1.8 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/skill/OPCODE-REFERENCE.md +18 -1
- package/assets/skill/SKILL.md +54 -11
- package/dist/browser.js +23 -1
- package/dist/capture-strategy.d.ts +14 -0
- package/dist/capture-strategy.js +28 -0
- package/dist/cli-contract.d.ts +61 -0
- package/dist/cli-runner.d.ts +10 -1
- package/dist/cli-runner.js +415 -20
- package/dist/cli.js +80 -0
- package/dist/clip-capture-loop.js +11 -2
- package/dist/cookie-dismiss.d.ts +1 -0
- package/dist/cookie-dismiss.js +13 -1
- package/dist/execution-schema.d.ts +303 -2
- package/dist/execution-schema.js +77 -4
- package/dist/execution-types.d.ts +114 -5
- package/dist/execution-types.js +2 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.js +2 -0
- package/dist/mouse-animation.d.ts +12 -2
- package/dist/mouse-animation.js +36 -6
- package/dist/opcode-actions.d.ts +2 -0
- package/dist/opcode-actions.js +39 -5
- package/dist/opcode-runner.d.ts +2 -0
- package/dist/opcode-runner.js +139 -17
- package/dist/openrouter-tts.d.ts +74 -0
- package/dist/openrouter-tts.js +218 -0
- package/dist/postcondition.js +36 -26
- package/dist/program-signing.d.ts +67 -0
- package/dist/recovery-chain.js +26 -12
- package/dist/server-credit-usage.d.ts +1 -1
- package/dist/video-narration-schema.d.ts +1165 -0
- package/dist/video-narration-schema.js +137 -0
- package/dist/web-playwright-local.d.ts +16 -0
- package/dist/web-playwright-local.js +204 -18
- package/package.json +9 -1
package/dist/execution-schema.js
CHANGED
|
@@ -53,12 +53,24 @@ export const RecoveryPolicySchema = z.object({
|
|
|
53
53
|
allowHealer: z.boolean(),
|
|
54
54
|
}).strict();
|
|
55
55
|
// ── Opcode base fields ──────────────────────────────────────────────
|
|
56
|
+
/**
|
|
57
|
+
* Stable identifier used to anchor narration overlays in `mediaMode='video'`
|
|
58
|
+
* runs. Constraints (per AUT-57 R8 mitigation):
|
|
59
|
+
* - kebab-case lowercase ASCII: `[a-z0-9-]+`
|
|
60
|
+
* - 1..64 characters
|
|
61
|
+
* - optional everywhere; required only on opcodes whose narration must
|
|
62
|
+
* survive program rewrites during run-time speech generation.
|
|
63
|
+
*/
|
|
64
|
+
const StepIdSchema = z.string().regex(/^[a-z0-9-]+$/, {
|
|
65
|
+
message: 'stepId must match /^[a-z0-9-]+$/ (lowercase kebab-case)',
|
|
66
|
+
}).min(1).max(64);
|
|
56
67
|
const opcodeBase = {
|
|
57
68
|
description: z.string().min(1),
|
|
58
69
|
postcondition: PostconditionSpecSchema,
|
|
59
70
|
recovery: RecoveryPolicySchema,
|
|
60
71
|
timeoutMs: z.number().int().positive(),
|
|
61
72
|
maxFailures: z.number().int().positive(),
|
|
73
|
+
stepId: StepIdSchema.optional(),
|
|
62
74
|
};
|
|
63
75
|
/**
|
|
64
76
|
* Strict URL validator: rejects schemes without a host (e.g. `"https://"`),
|
|
@@ -145,6 +157,32 @@ const WaitForOpcodeSchema = z.object({
|
|
|
145
157
|
});
|
|
146
158
|
}
|
|
147
159
|
});
|
|
160
|
+
const SleepOpcodeSchema = z.object({
|
|
161
|
+
kind: z.literal('SLEEP'),
|
|
162
|
+
...opcodeBase,
|
|
163
|
+
/** 1ms..60s. The video run step produces these from audio durations. */
|
|
164
|
+
durationMs: z.number().int().min(1).max(60_000),
|
|
165
|
+
/**
|
|
166
|
+
* Spoken narration to play during this pause (AUT-57 anchor). Authored by
|
|
167
|
+
* the IDE assistant — it has full project context (real product copy,
|
|
168
|
+
* design system, etc.) and writes natural conversational lines. The
|
|
169
|
+
* run-time TTS step renders the text and rewrites `durationMs` to match the
|
|
170
|
+
* resulting audio length. Required when `stepId` is set; ignored when
|
|
171
|
+
* `stepId` is absent (decorative pause without narration).
|
|
172
|
+
*/
|
|
173
|
+
narrationText: z.string().min(1).max(2000).optional(),
|
|
174
|
+
narrationTextByLocale: z.record(z.string().min(1).max(16), z.string().min(1).max(2000)).optional(),
|
|
175
|
+
}).strict().superRefine((value, ctx) => {
|
|
176
|
+
const hasLocaleText = value.narrationTextByLocale
|
|
177
|
+
&& Object.values(value.narrationTextByLocale).some((text) => text.trim().length > 0);
|
|
178
|
+
if (value.stepId && (!value.narrationText || !value.narrationText.trim()) && !hasLocaleText) {
|
|
179
|
+
ctx.addIssue({
|
|
180
|
+
code: z.ZodIssueCode.custom,
|
|
181
|
+
path: ['narrationText'],
|
|
182
|
+
message: 'SLEEP anchors with a stepId must carry `narrationText` or `narrationTextByLocale` — the IDE assistant authors the spoken line; AutoKap will TTS it at run time and rewrite durationMs.',
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
});
|
|
148
186
|
const storageHintSchema = z.object({
|
|
149
187
|
storage: z.enum(['localStorage', 'sessionStorage', 'cookie']),
|
|
150
188
|
key: z.string().min(1),
|
|
@@ -392,6 +430,7 @@ export const ExecutionOpcodeSchema = z.discriminatedUnion('kind', [
|
|
|
392
430
|
TypeOpcodeSchema,
|
|
393
431
|
PressKeyOpcodeSchema,
|
|
394
432
|
WaitForOpcodeSchema,
|
|
433
|
+
SleepOpcodeSchema,
|
|
395
434
|
SetLocaleOpcodeSchema,
|
|
396
435
|
SetThemeOpcodeSchema,
|
|
397
436
|
ScrollOpcodeSchema,
|
|
@@ -526,22 +565,48 @@ export const PreconditionSpecSchema = z.object({
|
|
|
526
565
|
cookies: z.array(cookieSchema).optional(),
|
|
527
566
|
}).strict();
|
|
528
567
|
// ── Artifact spec ───────────────────────────────────────────────────
|
|
568
|
+
const ResolutionSchema = z.object({
|
|
569
|
+
width: z.number().int().positive(),
|
|
570
|
+
height: z.number().int().positive(),
|
|
571
|
+
}).strict();
|
|
572
|
+
const DEFAULT_VIDEO_DELIVERY_RESOLUTION = { width: 1920, height: 1080 };
|
|
573
|
+
const LEGACY_VIDEO_CAPTURE_RESOLUTION = { width: 2560, height: 1440 };
|
|
574
|
+
function resolutionEquals(a, b) {
|
|
575
|
+
return a.width === b.width && a.height === b.height;
|
|
576
|
+
}
|
|
529
577
|
export const ArtifactSpecSchema = z.object({
|
|
530
|
-
mediaMode: z.enum(['screenshot', 'clip']),
|
|
578
|
+
mediaMode: z.enum(['screenshot', 'clip', 'video']),
|
|
531
579
|
format: z.object({
|
|
532
580
|
clipFormat: z.enum(['gif', 'mp4', 'both']).optional(),
|
|
533
581
|
screenshotFormat: z.enum(['png', 'jpeg']).optional(),
|
|
582
|
+
captureResolution: ResolutionSchema.optional(),
|
|
583
|
+
captureFps: z.number().int().min(1).max(60).optional(),
|
|
584
|
+
deliveryResolution: ResolutionSchema.optional(),
|
|
534
585
|
}).strict().optional(),
|
|
535
586
|
cursorTheme: z.enum(['minimal', 'macos', 'windows']).optional(),
|
|
536
587
|
maxClipDurationSec: z.number().positive().optional(),
|
|
537
588
|
applyMockup: z.boolean().optional(),
|
|
538
589
|
applyStatusBar: z.boolean().optional(),
|
|
539
|
-
}).strict()
|
|
590
|
+
}).strict().superRefine((value, ctx) => {
|
|
591
|
+
if (value.mediaMode === 'video') {
|
|
592
|
+
const res = value.format?.captureResolution;
|
|
593
|
+
const matchesDefault = res ? resolutionEquals(res, DEFAULT_VIDEO_DELIVERY_RESOLUTION) : false;
|
|
594
|
+
const matchesLegacy = res ? resolutionEquals(res, LEGACY_VIDEO_CAPTURE_RESOLUTION) : false;
|
|
595
|
+
if (!res || (!matchesDefault && !matchesLegacy)) {
|
|
596
|
+
ctx.addIssue({
|
|
597
|
+
code: z.ZodIssueCode.custom,
|
|
598
|
+
path: ['format', 'captureResolution'],
|
|
599
|
+
message: "mediaMode='video' requires format.captureResolution = { width: 1920, height: 1080 }; " +
|
|
600
|
+
'legacy 2560x1440 programs are accepted and normalized at runtime',
|
|
601
|
+
});
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
});
|
|
540
605
|
// ── Full program ────────────────────────────────────────────────────
|
|
541
606
|
export const ExecutionProgramSchema = z.object({
|
|
542
607
|
presetId: z.string().min(1),
|
|
543
608
|
programVersion: z.number().int().positive(),
|
|
544
|
-
mediaMode: z.enum(['screenshot', 'clip']),
|
|
609
|
+
mediaMode: z.enum(['screenshot', 'clip', 'video']),
|
|
545
610
|
baseUrl: StrictUrlSchema,
|
|
546
611
|
maxParallelCaptures: z.number().int().positive().optional(),
|
|
547
612
|
variants: z.array(VariantSpecSchema).min(1),
|
|
@@ -554,7 +619,15 @@ export const ExecutionProgramSchema = z.object({
|
|
|
554
619
|
compiledAt: z.string().datetime(),
|
|
555
620
|
compiledWith: z.string().optional(),
|
|
556
621
|
mockDataGroups: z.array(MockDataGroupSchema).optional(),
|
|
557
|
-
}).strict()
|
|
622
|
+
}).strict().superRefine((value, ctx) => {
|
|
623
|
+
if (value.mediaMode !== value.artifactPlan.mediaMode) {
|
|
624
|
+
ctx.addIssue({
|
|
625
|
+
code: z.ZodIssueCode.custom,
|
|
626
|
+
path: ['artifactPlan', 'mediaMode'],
|
|
627
|
+
message: `artifactPlan.mediaMode (${value.artifactPlan.mediaMode}) must match top-level mediaMode (${value.mediaMode})`,
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
});
|
|
558
631
|
// ── Healer patch ────────────────────────────────────────────────────
|
|
559
632
|
export const HealerPatchSchema = z.object({
|
|
560
633
|
opcodeIndex: z.number().int().min(0),
|
|
@@ -8,7 +8,7 @@ import type { AKTree, BrowserStorageState, BrowserSessionStorageState, VideoCurs
|
|
|
8
8
|
import type { MockupOptions } from './mockup.js';
|
|
9
9
|
/** Sentinel value that resolves to the current variant's locale or theme at runtime */
|
|
10
10
|
export declare const VARIANT_PLACEHOLDER: "$variant";
|
|
11
|
-
export declare const OPCODE_KINDS: readonly ["NAVIGATE", "DISMISS_OVERLAYS", "ASSERT_ROUTE", "ASSERT_SURFACE", "CLICK", "TYPE", "PRESS_KEY", "WAIT_FOR", "SET_LOCALE", "SET_THEME", "SCROLL", "CAPTURE_SCREENSHOT", "BEGIN_CLIP", "END_CLIP", "HOVER", "SELECT_OPTION", "CHECK", "DOUBLE_CLICK", "DRAG", "CLONE_ELEMENT", "INJECT_MOCK_DATA", "REMOVE_ELEMENT", "SET_ATTRIBUTE"];
|
|
11
|
+
export declare const OPCODE_KINDS: readonly ["NAVIGATE", "DISMISS_OVERLAYS", "ASSERT_ROUTE", "ASSERT_SURFACE", "CLICK", "TYPE", "PRESS_KEY", "WAIT_FOR", "SLEEP", "SET_LOCALE", "SET_THEME", "SCROLL", "CAPTURE_SCREENSHOT", "BEGIN_CLIP", "END_CLIP", "HOVER", "SELECT_OPTION", "CHECK", "DOUBLE_CLICK", "DRAG", "CLONE_ELEMENT", "INJECT_MOCK_DATA", "REMOVE_ELEMENT", "SET_ATTRIBUTE"];
|
|
12
12
|
export type OpcodeKind = (typeof OPCODE_KINDS)[number];
|
|
13
13
|
/**
|
|
14
14
|
* Soft opcodes are non-blocking — if their action or postcondition fails at
|
|
@@ -109,6 +109,13 @@ interface OpcodeBase {
|
|
|
109
109
|
timeoutMs: number;
|
|
110
110
|
/** Max recovery attempts before this opcode is considered failed. Default: 3 */
|
|
111
111
|
maxFailures: number;
|
|
112
|
+
/**
|
|
113
|
+
* Stable identifier used to anchor narration overlays in `mediaMode='video'`
|
|
114
|
+
* runs. Kebab-case, 1-64 chars. Optional — only required for opcodes whose
|
|
115
|
+
* narration segment must survive program rewrites (e.g. WAIT_FOR insertions
|
|
116
|
+
* before run-time speech generation). Ignored in `screenshot` and `clip` modes.
|
|
117
|
+
*/
|
|
118
|
+
stepId?: string;
|
|
112
119
|
}
|
|
113
120
|
export interface NavigateOpcode extends OpcodeBase {
|
|
114
121
|
kind: 'NAVIGATE';
|
|
@@ -170,6 +177,32 @@ export interface WaitForOpcode extends OpcodeBase {
|
|
|
170
177
|
/** 'visible' = element visible in viewport, 'attached' = exists in DOM. Default: 'visible' */
|
|
171
178
|
state: 'visible' | 'attached';
|
|
172
179
|
}
|
|
180
|
+
/**
|
|
181
|
+
* Pause execution for a fixed duration. Used by the video run-time TTS step to
|
|
182
|
+
* align visible actions with the narration audio window. Postcondition is
|
|
183
|
+
* implicitly `always` — the runtime sleeps then advances.
|
|
184
|
+
*/
|
|
185
|
+
export interface SleepOpcode extends OpcodeBase {
|
|
186
|
+
kind: 'SLEEP';
|
|
187
|
+
/** Sleep duration in milliseconds. Capped at 60_000 by the schema. */
|
|
188
|
+
durationMs: number;
|
|
189
|
+
/**
|
|
190
|
+
* Spoken narration to play during this pause (AUT-57 anchor). Authored by
|
|
191
|
+
* the IDE assistant — it has full project context (real product copy,
|
|
192
|
+
* design system, etc.) and writes natural conversational lines. The
|
|
193
|
+
* run-time TTS step renders the text and rewrites `durationMs` to match the
|
|
194
|
+
* resulting audio length. Required when `stepId` is set; ignored when
|
|
195
|
+
* `stepId` is absent (decorative pause without narration).
|
|
196
|
+
*/
|
|
197
|
+
narrationText?: string;
|
|
198
|
+
/**
|
|
199
|
+
* Locale-keyed spoken narration overrides for multivariant demo videos.
|
|
200
|
+
* Keys are BCP-47-ish locale tags such as `en`, `fr`, `fr-FR`. At prepare
|
|
201
|
+
* time AutoKap first tries the exact current locale, then its primary
|
|
202
|
+
* language subtag. `narrationText` remains the single-locale legacy fallback.
|
|
203
|
+
*/
|
|
204
|
+
narrationTextByLocale?: Record<string, string>;
|
|
205
|
+
}
|
|
173
206
|
export interface SetLocaleOpcode extends OpcodeBase {
|
|
174
207
|
kind: 'SET_LOCALE';
|
|
175
208
|
/** Target BCP-47 locale (e.g. "fr", "en-US") */
|
|
@@ -390,7 +423,7 @@ export interface SetAttributeOpcode extends OpcodeBase {
|
|
|
390
423
|
/** Attribute value */
|
|
391
424
|
value: string;
|
|
392
425
|
}
|
|
393
|
-
export type ExecutionOpcode = NavigateOpcode | DismissOverlaysOpcode | AssertRouteOpcode | AssertSurfaceOpcode | ClickOpcode | TypeOpcode | PressKeyOpcode | WaitForOpcode | SetLocaleOpcode | SetThemeOpcode | ScrollOpcode | CaptureScreenshotOpcode | BeginClipOpcode | EndClipOpcode | HoverOpcode | SelectOptionOpcode | CheckOpcode | DoubleClickOpcode | DragOpcode | CloneElementOpcode | InjectMockDataOpcode | RemoveElementOpcode | SetAttributeOpcode;
|
|
426
|
+
export type ExecutionOpcode = NavigateOpcode | DismissOverlaysOpcode | AssertRouteOpcode | AssertSurfaceOpcode | ClickOpcode | TypeOpcode | PressKeyOpcode | WaitForOpcode | SleepOpcode | SetLocaleOpcode | SetThemeOpcode | ScrollOpcode | CaptureScreenshotOpcode | BeginClipOpcode | EndClipOpcode | HoverOpcode | SelectOptionOpcode | CheckOpcode | DoubleClickOpcode | DragOpcode | CloneElementOpcode | InjectMockDataOpcode | RemoveElementOpcode | SetAttributeOpcode;
|
|
394
427
|
export interface VariantSpec {
|
|
395
428
|
id: string;
|
|
396
429
|
viewport: {
|
|
@@ -452,7 +485,7 @@ export interface PreconditionSpec {
|
|
|
452
485
|
path?: string;
|
|
453
486
|
}>;
|
|
454
487
|
}
|
|
455
|
-
export declare const MEDIA_MODES: readonly ["screenshot", "clip"];
|
|
488
|
+
export declare const MEDIA_MODES: readonly ["screenshot", "clip", "video"];
|
|
456
489
|
export type MediaMode = (typeof MEDIA_MODES)[number];
|
|
457
490
|
export interface ArtifactSpec {
|
|
458
491
|
mediaMode: MediaMode;
|
|
@@ -462,10 +495,34 @@ export interface ArtifactSpec {
|
|
|
462
495
|
clipFormat?: 'gif' | 'mp4' | 'both';
|
|
463
496
|
/** For screenshots: 'png' | 'jpeg'. Default: 'png' */
|
|
464
497
|
screenshotFormat?: 'png' | 'jpeg';
|
|
498
|
+
/**
|
|
499
|
+
* Physical capture surface used while recording. Required when
|
|
500
|
+
* `mediaMode='video'` and fixed at the delivery frame (1920×1080).
|
|
501
|
+
* Legacy 2560×1440 programs are accepted for compatibility and normalized
|
|
502
|
+
* by the CLI before recording.
|
|
503
|
+
*/
|
|
504
|
+
captureResolution?: {
|
|
505
|
+
width: number;
|
|
506
|
+
height: number;
|
|
507
|
+
};
|
|
508
|
+
/**
|
|
509
|
+
* Frame rate used by the capture loop. Defaults to platform-tuned values
|
|
510
|
+
* for `clip` (8 Linux / 15 elsewhere). For `mediaMode='video'` the runtime
|
|
511
|
+
* pins this to 30 fps unless explicitly overridden.
|
|
512
|
+
*/
|
|
513
|
+
captureFps?: number;
|
|
514
|
+
/**
|
|
515
|
+
* Final delivery resolution after compositing. Defaults to 1920×1080 for
|
|
516
|
+
* `mediaMode='video'`.
|
|
517
|
+
*/
|
|
518
|
+
deliveryResolution?: {
|
|
519
|
+
width: number;
|
|
520
|
+
height: number;
|
|
521
|
+
};
|
|
465
522
|
};
|
|
466
523
|
/** Cursor style for clip recordings. Default: 'minimal'. */
|
|
467
524
|
cursorTheme?: VideoCursorTheme;
|
|
468
|
-
/** Max clip duration in seconds. Clips are trimmed if they exceed this. Default: 8. */
|
|
525
|
+
/** Max clip duration in seconds. Clips are trimmed if they exceed this. Default: 8. Ignored when `mediaMode='video'`. */
|
|
469
526
|
maxClipDurationSec?: number;
|
|
470
527
|
/** Whether to apply device frame mockup. Default: false */
|
|
471
528
|
applyMockup?: boolean;
|
|
@@ -613,6 +670,30 @@ export interface RunTelemetry {
|
|
|
613
670
|
/** Per-group mock data application result */
|
|
614
671
|
mockDataGroupResults?: Record<string, 'applied' | 'skipped'>;
|
|
615
672
|
}
|
|
673
|
+
/**
|
|
674
|
+
* Per-opcode timing entry emitted when `mediaMode='video'`. Consumed by the
|
|
675
|
+
* compositor and telemetry tooling to align narration overlays and retain
|
|
676
|
+
* interaction metadata. `bbox` is captured BEFORE the action runs (the
|
|
677
|
+
* targeted element may move or disappear after).
|
|
678
|
+
*/
|
|
679
|
+
export interface OpcodeTiming {
|
|
680
|
+
stepIndex: number;
|
|
681
|
+
stepId?: string;
|
|
682
|
+
opcodeKind: OpcodeKind;
|
|
683
|
+
variantId: string;
|
|
684
|
+
/** clipId of the active BEGIN_CLIP context, if any */
|
|
685
|
+
clipId?: string;
|
|
686
|
+
/** Wall-clock ms relative to the start of the active clip recording */
|
|
687
|
+
timecodeStartMs: number;
|
|
688
|
+
timecodeEndMs: number;
|
|
689
|
+
/** Bounding box of the targeted element at action time (page coords). null if no DOM target. */
|
|
690
|
+
bbox?: {
|
|
691
|
+
x: number;
|
|
692
|
+
y: number;
|
|
693
|
+
width: number;
|
|
694
|
+
height: number;
|
|
695
|
+
} | null;
|
|
696
|
+
}
|
|
616
697
|
export interface RunResult {
|
|
617
698
|
programId: string;
|
|
618
699
|
success: boolean;
|
|
@@ -622,6 +703,11 @@ export interface RunResult {
|
|
|
622
703
|
healerPatches: HealerPatch[];
|
|
623
704
|
/** Total run duration (ms) */
|
|
624
705
|
totalDurationMs: number;
|
|
706
|
+
/**
|
|
707
|
+
* Per-opcode timing entries — populated when `mediaMode='video'`. Empty
|
|
708
|
+
* array for `screenshot` and `clip` modes.
|
|
709
|
+
*/
|
|
710
|
+
opcodeTimings: OpcodeTiming[];
|
|
625
711
|
error?: string;
|
|
626
712
|
}
|
|
627
713
|
export interface WaitCondition {
|
|
@@ -645,7 +731,17 @@ export interface ClickOptions {
|
|
|
645
731
|
button?: 'left' | 'right' | 'middle';
|
|
646
732
|
}
|
|
647
733
|
export interface RecordingOptions {
|
|
648
|
-
mediaMode: 'clip';
|
|
734
|
+
mediaMode: 'clip' | 'video';
|
|
735
|
+
/**
|
|
736
|
+
* Physical recording surface. Video capture records the delivery surface
|
|
737
|
+
* directly (1920×1080 by default).
|
|
738
|
+
*/
|
|
739
|
+
captureResolution?: {
|
|
740
|
+
width: number;
|
|
741
|
+
height: number;
|
|
742
|
+
};
|
|
743
|
+
/** Override the capture loop frame rate. Clamped to 1..30 by the loop. */
|
|
744
|
+
captureFps?: number;
|
|
649
745
|
}
|
|
650
746
|
export interface RecordingResult {
|
|
651
747
|
buffer: Buffer;
|
|
@@ -673,6 +769,19 @@ export interface RuntimeAdapter {
|
|
|
673
769
|
takeCleanScreenshot(): Promise<Buffer>;
|
|
674
770
|
beginRecording(options: RecordingOptions): Promise<void>;
|
|
675
771
|
endRecording(): Promise<RecordingResult>;
|
|
772
|
+
/**
|
|
773
|
+
* Page-coord bounding box of the first element matching `selector`. Returns
|
|
774
|
+
* null if the selector does not match a visible element. Used by the
|
|
775
|
+
* `mediaMode='video'` runtime to capture interaction metadata before each
|
|
776
|
+
* visible action. Optional — implementations that cannot resolve a bbox
|
|
777
|
+
* should leave this method off and the runtime will record `bbox: null`.
|
|
778
|
+
*/
|
|
779
|
+
getElementBoundingBox?(selector: string): Promise<{
|
|
780
|
+
x: number;
|
|
781
|
+
y: number;
|
|
782
|
+
width: number;
|
|
783
|
+
height: number;
|
|
784
|
+
} | null>;
|
|
676
785
|
setLocale(locale: string): Promise<void>;
|
|
677
786
|
setColorScheme(scheme: 'light' | 'dark'): Promise<void>;
|
|
678
787
|
reloadPage?(): Promise<void>;
|
package/dist/execution-types.js
CHANGED
|
@@ -16,6 +16,7 @@ export const OPCODE_KINDS = [
|
|
|
16
16
|
'TYPE',
|
|
17
17
|
'PRESS_KEY',
|
|
18
18
|
'WAIT_FOR',
|
|
19
|
+
'SLEEP',
|
|
19
20
|
'SET_LOCALE',
|
|
20
21
|
'SET_THEME',
|
|
21
22
|
'SCROLL',
|
|
@@ -55,7 +56,7 @@ export const DEFAULT_RECOVERY_POLICY = {
|
|
|
55
56
|
allowHealer: false,
|
|
56
57
|
};
|
|
57
58
|
// ── Artifact spec ───────────────────────────────────────────────────
|
|
58
|
-
export const MEDIA_MODES = ['screenshot', 'clip'];
|
|
59
|
+
export const MEDIA_MODES = ['screenshot', 'clip', 'video'];
|
|
59
60
|
export const DEFAULT_CIRCUIT_BREAKER = {
|
|
60
61
|
maxPerOpcode: 3,
|
|
61
62
|
maxPerPage: 5,
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
export type { ExecutionProgram, ExecutionOpcode, OpcodeKind, SemanticTarget, PostconditionSpec, PostconditionType, RecoveryPolicy, VariantSpec, PreconditionSpec, ArtifactSpec, MediaMode, CircuitBreakerConfig, HealerPatch, OpcodeResult, OpcodeResultStatus, VariantResult, ArtifactResult, RunResult, RunTelemetry, LLMStepUsage, LLMStepType, RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult, HoverOpcode, SelectOptionOpcode, CheckOpcode, DoubleClickOpcode, CloneElementOpcode, InjectMockDataOpcode, RemoveElementOpcode, SetAttributeOpcode, MockDataSlot, MockDataRow, MockDataGroup, } from './execution-types.js';
|
|
1
|
+
export type { ExecutionProgram, ExecutionOpcode, OpcodeKind, SemanticTarget, PostconditionSpec, PostconditionType, RecoveryPolicy, VariantSpec, PreconditionSpec, ArtifactSpec, MediaMode, CircuitBreakerConfig, HealerPatch, OpcodeResult, OpcodeResultStatus, VariantResult, ArtifactResult, RunResult, RunTelemetry, LLMStepUsage, LLMStepType, RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult, OpcodeTiming, HoverOpcode, SelectOptionOpcode, CheckOpcode, DoubleClickOpcode, CloneElementOpcode, InjectMockDataOpcode, RemoveElementOpcode, SetAttributeOpcode, MockDataSlot, MockDataRow, MockDataGroup, } from './execution-types.js';
|
|
2
2
|
export { DEFAULT_RECOVERY_POLICY, DEFAULT_CIRCUIT_BREAKER, VARIANT_PLACEHOLDER, SOFT_OPCODE_KINDS, isSoftOpcodeKind, } from './execution-types.js';
|
|
3
3
|
export { ExecutionProgramSchema, ExecutionOpcodeSchema, parseProgram, parseOpcode, safeParseProgramResult, MockDataSlotSchema, MockDataRowSchema, MockDataGroupSchema, } from './execution-schema.js';
|
|
4
|
+
export { NarrationSegmentSchema, VideoNarrationOverlaySchema, VideoIngestPayloadSchema, parseVideoIngestPayload, SUPPORTED_TTS_LOCALES, normalizeLocaleTag, primaryLocaleSubtag, resolveSupportedTtsLocale, VIDEO_STATUSES, VIDEO_FLOW_KIND_OPCODE, VIDEO_FLOW_KIND_LEGACY, } from './video-narration-schema.js';
|
|
5
|
+
export type { NarrationSegment, VideoNarrationOverlay, VideoIngestPayload, SupportedTtsLocale, VideoStatus, } from './video-narration-schema.js';
|
|
6
|
+
export { generateTtsChunk, probeAudioDurationMs, ttsTestHooks, TtsError, } from './openrouter-tts.js';
|
|
7
|
+
export type { TtsClientConfig, TtsRequest, TtsResponse, TtsResponseFormat, } from './openrouter-tts.js';
|
|
4
8
|
export { callLLM } from './llm-provider.js';
|
|
5
9
|
export type { LLMProviderConfig, LLMCallResult } from './llm-provider.js';
|
|
6
10
|
export { normalizeAllowedOrigins, normalizeHttpOrigin, signExecutionProgramEnvelope, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
|
package/dist/index.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
// Server-side callers must use specific subpaths (ESLint enforces this).
|
|
5
5
|
export { DEFAULT_RECOVERY_POLICY, DEFAULT_CIRCUIT_BREAKER, VARIANT_PLACEHOLDER, SOFT_OPCODE_KINDS, isSoftOpcodeKind, } from './execution-types.js';
|
|
6
6
|
export { ExecutionProgramSchema, ExecutionOpcodeSchema, parseProgram, parseOpcode, safeParseProgramResult, MockDataSlotSchema, MockDataRowSchema, MockDataGroupSchema, } from './execution-schema.js';
|
|
7
|
+
export { NarrationSegmentSchema, VideoNarrationOverlaySchema, VideoIngestPayloadSchema, parseVideoIngestPayload, SUPPORTED_TTS_LOCALES, normalizeLocaleTag, primaryLocaleSubtag, resolveSupportedTtsLocale, VIDEO_STATUSES, VIDEO_FLOW_KIND_OPCODE, VIDEO_FLOW_KIND_LEGACY, } from './video-narration-schema.js';
|
|
8
|
+
export { generateTtsChunk, probeAudioDurationMs, ttsTestHooks, TtsError, } from './openrouter-tts.js';
|
|
7
9
|
// ── LLM-backed helpers (OpenAI, no browser) ─────────────────────────
|
|
8
10
|
export { callLLM } from './llm-provider.js';
|
|
9
11
|
export { normalizeAllowedOrigins, normalizeHttpOrigin, signExecutionProgramEnvelope, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
|
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
import type { Page } from 'playwright';
|
|
2
2
|
export interface BezierMoveOptions {
|
|
3
|
-
/** Total duration in milliseconds. Default: proportional to distance
|
|
3
|
+
/** Total duration in milliseconds. Default: proportional to distance + pace. */
|
|
4
4
|
durationMs?: number;
|
|
5
|
-
/** Number of intermediate points. Default:
|
|
5
|
+
/** Number of intermediate points. Default: derived from durationMs (one step per 16ms). */
|
|
6
6
|
steps?: number;
|
|
7
|
+
/**
|
|
8
|
+
* Pace profile when `durationMs` is not explicit. AUT-57 video demos use
|
|
9
|
+
* `'natural'` for deliberate, human-readable cursor motion; the default
|
|
10
|
+
* `'fast'` keeps short clip GIFs snappy.
|
|
11
|
+
*
|
|
12
|
+
* Both profiles use the same √distance · factor formula but different
|
|
13
|
+
* (min, max, factor) tuples — see `paceProfile`.
|
|
14
|
+
*/
|
|
15
|
+
pace?: BezierMovePace;
|
|
7
16
|
}
|
|
17
|
+
export type BezierMovePace = 'fast' | 'natural';
|
|
8
18
|
/**
|
|
9
19
|
* Move the mouse from `from` to `to` along a cubic Bezier curve with
|
|
10
20
|
* smooth controlled motion: ease-in-out timing, gentle proportional curves.
|
package/dist/mouse-animation.js
CHANGED
|
@@ -1,3 +1,29 @@
|
|
|
1
|
+
const FAST_PROFILE = {
|
|
2
|
+
minMs: 80,
|
|
3
|
+
maxMs: 700,
|
|
4
|
+
factor: 28,
|
|
5
|
+
hoverPauseMs: { min: 80, max: 160 },
|
|
6
|
+
pressMs: 70,
|
|
7
|
+
hoverDwellMs: { min: 200, max: 400 },
|
|
8
|
+
};
|
|
9
|
+
/**
|
|
10
|
+
* Natural pace — used by `mediaMode='video'` demos. The min floor is high
|
|
11
|
+
* enough that *any* cursor move reads as deliberate (a flash to a 100px-away
|
|
12
|
+
* nav link still takes >500ms), the cap leaves long traverses at ~1.5s
|
|
13
|
+
* (further than that drags), and the post-arrival beats are wider so each
|
|
14
|
+
* click feels like a real person committing to it.
|
|
15
|
+
*/
|
|
16
|
+
const NATURAL_PROFILE = {
|
|
17
|
+
minMs: 600,
|
|
18
|
+
maxMs: 1500,
|
|
19
|
+
factor: 50,
|
|
20
|
+
hoverPauseMs: { min: 200, max: 350 },
|
|
21
|
+
pressMs: 90,
|
|
22
|
+
hoverDwellMs: { min: 400, max: 700 },
|
|
23
|
+
};
|
|
24
|
+
function paceProfile(pace) {
|
|
25
|
+
return pace === 'natural' ? NATURAL_PROFILE : FAST_PROFILE;
|
|
26
|
+
}
|
|
1
27
|
/** Ease-in-out cubic timing function */
|
|
2
28
|
function easeInOut(t) {
|
|
3
29
|
return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
|
|
@@ -23,7 +49,9 @@ export async function moveMouse(page, from, to, options = {}) {
|
|
|
23
49
|
const distance = Math.sqrt(dx * dx + dy * dy);
|
|
24
50
|
if (distance < 2)
|
|
25
51
|
return; // Already there
|
|
26
|
-
const
|
|
52
|
+
const profile = paceProfile(options.pace);
|
|
53
|
+
const durationMs = options.durationMs
|
|
54
|
+
?? Math.min(profile.maxMs, Math.max(profile.minMs, Math.sqrt(distance) * profile.factor));
|
|
27
55
|
const steps = options.steps ?? Math.max(4, Math.ceil(durationMs / 16));
|
|
28
56
|
const msPerStep = durationMs / steps;
|
|
29
57
|
// Control points: straight line for short moves, gentle proportional arc for longer ones.
|
|
@@ -74,30 +102,32 @@ export async function moveMouse(page, from, to, options = {}) {
|
|
|
74
102
|
* is performed at `target` without a preceding Bezier move (first action).
|
|
75
103
|
*/
|
|
76
104
|
export async function animatedClick(page, target, fromCurrent, options = {}) {
|
|
105
|
+
const profile = paceProfile(options.pace);
|
|
77
106
|
if (fromCurrent) {
|
|
78
107
|
await moveMouse(page, fromCurrent, target, options);
|
|
79
108
|
}
|
|
80
109
|
else {
|
|
81
110
|
await page.mouse.move(target.x, target.y);
|
|
82
111
|
}
|
|
83
|
-
|
|
84
|
-
await page.waitForTimeout(
|
|
112
|
+
const hoverSpread = profile.hoverPauseMs.max - profile.hoverPauseMs.min;
|
|
113
|
+
await page.waitForTimeout(profile.hoverPauseMs.min + Math.random() * hoverSpread);
|
|
85
114
|
await page.mouse.down();
|
|
86
|
-
await page.waitForTimeout(
|
|
115
|
+
await page.waitForTimeout(profile.pressMs);
|
|
87
116
|
await page.mouse.up();
|
|
88
117
|
}
|
|
89
118
|
/**
|
|
90
119
|
* Move the mouse to `target` (for hover/highlight actions) without clicking.
|
|
91
120
|
*/
|
|
92
121
|
export async function animatedHover(page, target, fromCurrent, options = {}) {
|
|
122
|
+
const profile = paceProfile(options.pace);
|
|
93
123
|
if (fromCurrent) {
|
|
94
124
|
await moveMouse(page, fromCurrent, target, options);
|
|
95
125
|
}
|
|
96
126
|
else {
|
|
97
127
|
await page.mouse.move(target.x, target.y);
|
|
98
128
|
}
|
|
99
|
-
|
|
100
|
-
await page.waitForTimeout(
|
|
129
|
+
const dwellSpread = profile.hoverDwellMs.max - profile.hoverDwellMs.min;
|
|
130
|
+
await page.waitForTimeout(profile.hoverDwellMs.min + Math.random() * dwellSpread);
|
|
101
131
|
}
|
|
102
132
|
/**
|
|
103
133
|
* Type text into the currently focused element at a human-like typing speed.
|
package/dist/opcode-actions.d.ts
CHANGED
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import type { ExecutionOpcode, MockDataGroup, RuntimeAdapter, VariantSpec } from './execution-types.js';
|
|
8
8
|
export interface OpcodeActionContext {
|
|
9
9
|
currentVariant?: VariantSpec;
|
|
10
|
+
/** Disallow opcodes that would reload the page while a clip is being recorded. */
|
|
11
|
+
suppressPageReloads?: boolean;
|
|
10
12
|
/** Mock data groups available to INJECT_MOCK_DATA. Plumbed from ExecutionProgram. */
|
|
11
13
|
mockDataGroups?: MockDataGroup[];
|
|
12
14
|
/**
|
package/dist/opcode-actions.js
CHANGED
|
@@ -121,10 +121,19 @@ export async function executeOpcodeCoreAction(opcode, adapter, context = {}) {
|
|
|
121
121
|
}
|
|
122
122
|
break;
|
|
123
123
|
}
|
|
124
|
+
case 'SLEEP': {
|
|
125
|
+
// Hold execution for the configured duration. Demo video runs rewrite
|
|
126
|
+
// narration anchors before execution so visible actions align with
|
|
127
|
+
// audio windows. The opcode `timeoutMs` budget is independent — schema
|
|
128
|
+
// pins durationMs to 60s and the runner gives us a slightly larger
|
|
129
|
+
// budget so the await doesn't trip the per-opcode deadline.
|
|
130
|
+
await new Promise((resolve) => setTimeout(resolve, opcode.durationMs));
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
124
133
|
case 'SET_LOCALE':
|
|
125
|
-
return applyLocaleOpcode(opcode, adapter, context.currentVariant);
|
|
134
|
+
return applyLocaleOpcode(opcode, adapter, context.currentVariant, context.suppressPageReloads);
|
|
126
135
|
case 'SET_THEME':
|
|
127
|
-
return applyThemeOpcode(opcode, adapter, context.currentVariant);
|
|
136
|
+
return applyThemeOpcode(opcode, adapter, context.currentVariant, context.suppressPageReloads);
|
|
128
137
|
case 'SCROLL':
|
|
129
138
|
if (opcode.targetSelector) {
|
|
130
139
|
await adapter.scrollIntoView(opcode.targetSelector);
|
|
@@ -252,16 +261,25 @@ function resolveStorageHintValues(hints, replacement, fieldName) {
|
|
|
252
261
|
}
|
|
253
262
|
return { resolvedHints };
|
|
254
263
|
}
|
|
255
|
-
async function applyLocaleOpcode(opcode, adapter, currentVariant) {
|
|
264
|
+
async function applyLocaleOpcode(opcode, adapter, currentVariant, suppressPageReloads = false) {
|
|
256
265
|
const localeResult = resolveVariantPlaceholder(opcode.locale, currentVariant?.locale, 'SET_LOCALE.locale');
|
|
257
266
|
if ('error' in localeResult)
|
|
258
267
|
return { success: false, error: localeResult.error };
|
|
259
268
|
const resolvedLocale = localeResult.resolved;
|
|
260
269
|
if (opcode.method === 'browser_context') {
|
|
261
|
-
await adapter.setLocale(resolvedLocale);
|
|
262
270
|
if (adapter.reloadPage) {
|
|
271
|
+
if (suppressPageReloads) {
|
|
272
|
+
return {
|
|
273
|
+
success: false,
|
|
274
|
+
error: 'SET_LOCALE browser_context would reload during active recording; move it before BEGIN_CLIP',
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
await adapter.setLocale(resolvedLocale);
|
|
263
278
|
await adapter.reloadPage();
|
|
264
279
|
}
|
|
280
|
+
else {
|
|
281
|
+
await adapter.setLocale(resolvedLocale);
|
|
282
|
+
}
|
|
265
283
|
return { success: true };
|
|
266
284
|
}
|
|
267
285
|
if (opcode.method === 'ui_interaction') {
|
|
@@ -278,6 +296,14 @@ async function applyLocaleOpcode(opcode, adapter, currentVariant) {
|
|
|
278
296
|
if ('error' in resolvedHints) {
|
|
279
297
|
return { success: false, error: resolvedHints.error };
|
|
280
298
|
}
|
|
299
|
+
if (adapter.reloadPage) {
|
|
300
|
+
if (suppressPageReloads) {
|
|
301
|
+
return {
|
|
302
|
+
success: false,
|
|
303
|
+
error: 'SET_LOCALE storage mode would reload during active recording; move it before BEGIN_CLIP',
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
}
|
|
281
307
|
const writes = await Promise.all(resolvedHints.resolvedHints.map((hint) => adapter.writeStorageHint({
|
|
282
308
|
storage: hint.storage,
|
|
283
309
|
key: hint.key,
|
|
@@ -482,7 +508,7 @@ async function applyInjectMockDataTriggerStrategy(opcode, adapter, group) {
|
|
|
482
508
|
});
|
|
483
509
|
return { success: true };
|
|
484
510
|
}
|
|
485
|
-
async function applyThemeOpcode(opcode, adapter, currentVariant) {
|
|
511
|
+
async function applyThemeOpcode(opcode, adapter, currentVariant, suppressPageReloads = false) {
|
|
486
512
|
const themeResult = resolveVariantPlaceholder(opcode.theme, currentVariant?.theme, 'SET_THEME.theme');
|
|
487
513
|
if ('error' in themeResult)
|
|
488
514
|
return { success: false, error: themeResult.error };
|
|
@@ -505,6 +531,14 @@ async function applyThemeOpcode(opcode, adapter, currentVariant) {
|
|
|
505
531
|
if ('error' in resolvedHints) {
|
|
506
532
|
return { success: false, error: resolvedHints.error };
|
|
507
533
|
}
|
|
534
|
+
if (adapter.reloadPage) {
|
|
535
|
+
if (suppressPageReloads) {
|
|
536
|
+
return {
|
|
537
|
+
success: false,
|
|
538
|
+
error: 'SET_THEME storage mode would reload during active recording; move it before BEGIN_CLIP',
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
}
|
|
508
542
|
const writes = await Promise.all(resolvedHints.resolvedHints.map((hint) => adapter.writeStorageHint({
|
|
509
543
|
storage: hint.storage,
|
|
510
544
|
key: hint.key,
|
package/dist/opcode-runner.d.ts
CHANGED
|
@@ -21,6 +21,8 @@ export interface RecoveryAttemptOptions {
|
|
|
21
21
|
remainingTimeMs?: number;
|
|
22
22
|
maxDeterministicRetries?: number;
|
|
23
23
|
currentVariant?: VariantSpec;
|
|
24
|
+
allowPageReload?: boolean;
|
|
25
|
+
suppressPageReloads?: boolean;
|
|
24
26
|
}
|
|
25
27
|
/** Default recovery chain for consumers that do not inject one. */
|
|
26
28
|
export declare class NoOpRecoveryChain implements RecoveryChain {
|