autokap 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,12 +53,24 @@ export const RecoveryPolicySchema = z.object({
53
53
  allowHealer: z.boolean(),
54
54
  }).strict();
55
55
  // ── Opcode base fields ──────────────────────────────────────────────
56
+ /**
57
+ * Stable identifier used to anchor narration overlays in `mediaMode='video'`
58
+ * runs. Constraints (per AUT-57 R8 mitigation):
59
+ * - kebab-case lowercase ASCII: `[a-z0-9-]+`
60
+ * - 1..64 characters
61
+ * - optional everywhere; required only on opcodes whose narration must
62
+ * survive program rewrites during run-time speech generation.
63
+ */
64
+ const StepIdSchema = z.string().regex(/^[a-z0-9-]+$/, {
65
+ message: 'stepId must match /^[a-z0-9-]+$/ (lowercase kebab-case)',
66
+ }).min(1).max(64);
56
67
  const opcodeBase = {
57
68
  description: z.string().min(1),
58
69
  postcondition: PostconditionSpecSchema,
59
70
  recovery: RecoveryPolicySchema,
60
71
  timeoutMs: z.number().int().positive(),
61
72
  maxFailures: z.number().int().positive(),
73
+ stepId: StepIdSchema.optional(),
62
74
  };
63
75
  /**
64
76
  * Strict URL validator: rejects schemes without a host (e.g. `"https://"`),
@@ -145,6 +157,32 @@ const WaitForOpcodeSchema = z.object({
145
157
  });
146
158
  }
147
159
  });
160
+ const SleepOpcodeSchema = z.object({
161
+ kind: z.literal('SLEEP'),
162
+ ...opcodeBase,
163
+ /** 1ms..60s. The video run step produces these from audio durations. */
164
+ durationMs: z.number().int().min(1).max(60_000),
165
+ /**
166
+ * Spoken narration to play during this pause (AUT-57 anchor). Authored by
167
+ * the IDE assistant — it has full project context (real product copy,
168
+ * design system, etc.) and writes natural conversational lines. The
169
+ * run-time TTS step renders the text and rewrites `durationMs` to match the
170
+ * resulting audio length. Required when `stepId` is set; ignored when
171
+ * `stepId` is absent (decorative pause without narration).
172
+ */
173
+ narrationText: z.string().min(1).max(2000).optional(),
174
+ narrationTextByLocale: z.record(z.string().min(1).max(16), z.string().min(1).max(2000)).optional(),
175
+ }).strict().superRefine((value, ctx) => {
176
+ const hasLocaleText = value.narrationTextByLocale
177
+ && Object.values(value.narrationTextByLocale).some((text) => text.trim().length > 0);
178
+ if (value.stepId && (!value.narrationText || !value.narrationText.trim()) && !hasLocaleText) {
179
+ ctx.addIssue({
180
+ code: z.ZodIssueCode.custom,
181
+ path: ['narrationText'],
182
+ message: 'SLEEP anchors with a stepId must carry `narrationText` or `narrationTextByLocale` — the IDE assistant authors the spoken line; AutoKap will TTS it at run time and rewrite durationMs.',
183
+ });
184
+ }
185
+ });
148
186
  const storageHintSchema = z.object({
149
187
  storage: z.enum(['localStorage', 'sessionStorage', 'cookie']),
150
188
  key: z.string().min(1),
@@ -392,6 +430,7 @@ export const ExecutionOpcodeSchema = z.discriminatedUnion('kind', [
392
430
  TypeOpcodeSchema,
393
431
  PressKeyOpcodeSchema,
394
432
  WaitForOpcodeSchema,
433
+ SleepOpcodeSchema,
395
434
  SetLocaleOpcodeSchema,
396
435
  SetThemeOpcodeSchema,
397
436
  ScrollOpcodeSchema,
@@ -526,22 +565,48 @@ export const PreconditionSpecSchema = z.object({
526
565
  cookies: z.array(cookieSchema).optional(),
527
566
  }).strict();
528
567
  // ── Artifact spec ───────────────────────────────────────────────────
568
+ const ResolutionSchema = z.object({
569
+ width: z.number().int().positive(),
570
+ height: z.number().int().positive(),
571
+ }).strict();
572
+ const DEFAULT_VIDEO_DELIVERY_RESOLUTION = { width: 1920, height: 1080 };
573
+ const LEGACY_VIDEO_CAPTURE_RESOLUTION = { width: 2560, height: 1440 };
574
+ function resolutionEquals(a, b) {
575
+ return a.width === b.width && a.height === b.height;
576
+ }
529
577
  export const ArtifactSpecSchema = z.object({
530
- mediaMode: z.enum(['screenshot', 'clip']),
578
+ mediaMode: z.enum(['screenshot', 'clip', 'video']),
531
579
  format: z.object({
532
580
  clipFormat: z.enum(['gif', 'mp4', 'both']).optional(),
533
581
  screenshotFormat: z.enum(['png', 'jpeg']).optional(),
582
+ captureResolution: ResolutionSchema.optional(),
583
+ captureFps: z.number().int().min(1).max(60).optional(),
584
+ deliveryResolution: ResolutionSchema.optional(),
534
585
  }).strict().optional(),
535
586
  cursorTheme: z.enum(['minimal', 'macos', 'windows']).optional(),
536
587
  maxClipDurationSec: z.number().positive().optional(),
537
588
  applyMockup: z.boolean().optional(),
538
589
  applyStatusBar: z.boolean().optional(),
539
- }).strict();
590
+ }).strict().superRefine((value, ctx) => {
591
+ if (value.mediaMode === 'video') {
592
+ const res = value.format?.captureResolution;
593
+ const matchesDefault = res ? resolutionEquals(res, DEFAULT_VIDEO_DELIVERY_RESOLUTION) : false;
594
+ const matchesLegacy = res ? resolutionEquals(res, LEGACY_VIDEO_CAPTURE_RESOLUTION) : false;
595
+ if (!res || (!matchesDefault && !matchesLegacy)) {
596
+ ctx.addIssue({
597
+ code: z.ZodIssueCode.custom,
598
+ path: ['format', 'captureResolution'],
599
+ message: "mediaMode='video' requires format.captureResolution = { width: 1920, height: 1080 }; " +
600
+ 'legacy 2560x1440 programs are accepted and normalized at runtime',
601
+ });
602
+ }
603
+ }
604
+ });
540
605
  // ── Full program ────────────────────────────────────────────────────
541
606
  export const ExecutionProgramSchema = z.object({
542
607
  presetId: z.string().min(1),
543
608
  programVersion: z.number().int().positive(),
544
- mediaMode: z.enum(['screenshot', 'clip']),
609
+ mediaMode: z.enum(['screenshot', 'clip', 'video']),
545
610
  baseUrl: StrictUrlSchema,
546
611
  maxParallelCaptures: z.number().int().positive().optional(),
547
612
  variants: z.array(VariantSpecSchema).min(1),
@@ -554,7 +619,15 @@ export const ExecutionProgramSchema = z.object({
554
619
  compiledAt: z.string().datetime(),
555
620
  compiledWith: z.string().optional(),
556
621
  mockDataGroups: z.array(MockDataGroupSchema).optional(),
557
- }).strict();
622
+ }).strict().superRefine((value, ctx) => {
623
+ if (value.mediaMode !== value.artifactPlan.mediaMode) {
624
+ ctx.addIssue({
625
+ code: z.ZodIssueCode.custom,
626
+ path: ['artifactPlan', 'mediaMode'],
627
+ message: `artifactPlan.mediaMode (${value.artifactPlan.mediaMode}) must match top-level mediaMode (${value.mediaMode})`,
628
+ });
629
+ }
630
+ });
558
631
  // ── Healer patch ────────────────────────────────────────────────────
559
632
  export const HealerPatchSchema = z.object({
560
633
  opcodeIndex: z.number().int().min(0),
@@ -8,7 +8,7 @@ import type { AKTree, BrowserStorageState, BrowserSessionStorageState, VideoCurs
8
8
  import type { MockupOptions } from './mockup.js';
9
9
  /** Sentinel value that resolves to the current variant's locale or theme at runtime */
10
10
  export declare const VARIANT_PLACEHOLDER: "$variant";
11
- export declare const OPCODE_KINDS: readonly ["NAVIGATE", "DISMISS_OVERLAYS", "ASSERT_ROUTE", "ASSERT_SURFACE", "CLICK", "TYPE", "PRESS_KEY", "WAIT_FOR", "SET_LOCALE", "SET_THEME", "SCROLL", "CAPTURE_SCREENSHOT", "BEGIN_CLIP", "END_CLIP", "HOVER", "SELECT_OPTION", "CHECK", "DOUBLE_CLICK", "DRAG", "CLONE_ELEMENT", "INJECT_MOCK_DATA", "REMOVE_ELEMENT", "SET_ATTRIBUTE"];
11
+ export declare const OPCODE_KINDS: readonly ["NAVIGATE", "DISMISS_OVERLAYS", "ASSERT_ROUTE", "ASSERT_SURFACE", "CLICK", "TYPE", "PRESS_KEY", "WAIT_FOR", "SLEEP", "SET_LOCALE", "SET_THEME", "SCROLL", "CAPTURE_SCREENSHOT", "BEGIN_CLIP", "END_CLIP", "HOVER", "SELECT_OPTION", "CHECK", "DOUBLE_CLICK", "DRAG", "CLONE_ELEMENT", "INJECT_MOCK_DATA", "REMOVE_ELEMENT", "SET_ATTRIBUTE"];
12
12
  export type OpcodeKind = (typeof OPCODE_KINDS)[number];
13
13
  /**
14
14
  * Soft opcodes are non-blocking — if their action or postcondition fails at
@@ -109,6 +109,13 @@ interface OpcodeBase {
109
109
  timeoutMs: number;
110
110
  /** Max recovery attempts before this opcode is considered failed. Default: 3 */
111
111
  maxFailures: number;
112
+ /**
113
+ * Stable identifier used to anchor narration overlays in `mediaMode='video'`
114
+ * runs. Kebab-case, 1-64 chars. Optional — only required for opcodes whose
115
+ * narration segment must survive program rewrites (e.g. WAIT_FOR insertions
116
+ * before run-time speech generation). Ignored in `screenshot` and `clip` modes.
117
+ */
118
+ stepId?: string;
112
119
  }
113
120
  export interface NavigateOpcode extends OpcodeBase {
114
121
  kind: 'NAVIGATE';
@@ -170,6 +177,32 @@ export interface WaitForOpcode extends OpcodeBase {
170
177
  /** 'visible' = element visible in viewport, 'attached' = exists in DOM. Default: 'visible' */
171
178
  state: 'visible' | 'attached';
172
179
  }
180
+ /**
181
+ * Pause execution for a fixed duration. Used by the video run-time TTS step to
182
+ * align visible actions with the narration audio window. Postcondition is
183
+ * implicitly `always` — the runtime sleeps then advances.
184
+ */
185
+ export interface SleepOpcode extends OpcodeBase {
186
+ kind: 'SLEEP';
187
+ /** Sleep duration in milliseconds. Capped at 60_000 by the schema. */
188
+ durationMs: number;
189
+ /**
190
+ * Spoken narration to play during this pause (AUT-57 anchor). Authored by
191
+ * the IDE assistant — it has full project context (real product copy,
192
+ * design system, etc.) and writes natural conversational lines. The
193
+ * run-time TTS step renders the text and rewrites `durationMs` to match the
194
+ * resulting audio length. Required when `stepId` is set; ignored when
195
+ * `stepId` is absent (decorative pause without narration).
196
+ */
197
+ narrationText?: string;
198
+ /**
199
+ * Locale-keyed spoken narration overrides for multivariant demo videos.
200
+ * Keys are BCP-47-ish locale tags such as `en`, `fr`, `fr-FR`. At prepare
201
+ * time AutoKap first tries the exact current locale, then its primary
202
+ * language subtag. `narrationText` remains the single-locale legacy fallback.
203
+ */
204
+ narrationTextByLocale?: Record<string, string>;
205
+ }
173
206
  export interface SetLocaleOpcode extends OpcodeBase {
174
207
  kind: 'SET_LOCALE';
175
208
  /** Target BCP-47 locale (e.g. "fr", "en-US") */
@@ -390,7 +423,7 @@ export interface SetAttributeOpcode extends OpcodeBase {
390
423
  /** Attribute value */
391
424
  value: string;
392
425
  }
393
- export type ExecutionOpcode = NavigateOpcode | DismissOverlaysOpcode | AssertRouteOpcode | AssertSurfaceOpcode | ClickOpcode | TypeOpcode | PressKeyOpcode | WaitForOpcode | SetLocaleOpcode | SetThemeOpcode | ScrollOpcode | CaptureScreenshotOpcode | BeginClipOpcode | EndClipOpcode | HoverOpcode | SelectOptionOpcode | CheckOpcode | DoubleClickOpcode | DragOpcode | CloneElementOpcode | InjectMockDataOpcode | RemoveElementOpcode | SetAttributeOpcode;
426
+ export type ExecutionOpcode = NavigateOpcode | DismissOverlaysOpcode | AssertRouteOpcode | AssertSurfaceOpcode | ClickOpcode | TypeOpcode | PressKeyOpcode | WaitForOpcode | SleepOpcode | SetLocaleOpcode | SetThemeOpcode | ScrollOpcode | CaptureScreenshotOpcode | BeginClipOpcode | EndClipOpcode | HoverOpcode | SelectOptionOpcode | CheckOpcode | DoubleClickOpcode | DragOpcode | CloneElementOpcode | InjectMockDataOpcode | RemoveElementOpcode | SetAttributeOpcode;
394
427
  export interface VariantSpec {
395
428
  id: string;
396
429
  viewport: {
@@ -452,7 +485,7 @@ export interface PreconditionSpec {
452
485
  path?: string;
453
486
  }>;
454
487
  }
455
- export declare const MEDIA_MODES: readonly ["screenshot", "clip"];
488
+ export declare const MEDIA_MODES: readonly ["screenshot", "clip", "video"];
456
489
  export type MediaMode = (typeof MEDIA_MODES)[number];
457
490
  export interface ArtifactSpec {
458
491
  mediaMode: MediaMode;
@@ -462,10 +495,34 @@ export interface ArtifactSpec {
462
495
  clipFormat?: 'gif' | 'mp4' | 'both';
463
496
  /** For screenshots: 'png' | 'jpeg'. Default: 'png' */
464
497
  screenshotFormat?: 'png' | 'jpeg';
498
+ /**
499
+ * Physical capture surface used while recording. Required when
500
+ * `mediaMode='video'` and fixed at the delivery frame (1920×1080).
501
+ * Legacy 2560×1440 programs are accepted for compatibility and normalized
502
+ * by the CLI before recording.
503
+ */
504
+ captureResolution?: {
505
+ width: number;
506
+ height: number;
507
+ };
508
+ /**
509
+ * Frame rate used by the capture loop. Defaults to platform-tuned values
510
+ * for `clip` (8 Linux / 15 elsewhere). For `mediaMode='video'` the runtime
511
+ * pins this to 30 fps unless explicitly overridden.
512
+ */
513
+ captureFps?: number;
514
+ /**
515
+ * Final delivery resolution after compositing. Defaults to 1920×1080 for
516
+ * `mediaMode='video'`.
517
+ */
518
+ deliveryResolution?: {
519
+ width: number;
520
+ height: number;
521
+ };
465
522
  };
466
523
  /** Cursor style for clip recordings. Default: 'minimal'. */
467
524
  cursorTheme?: VideoCursorTheme;
468
- /** Max clip duration in seconds. Clips are trimmed if they exceed this. Default: 8. */
525
+ /** Max clip duration in seconds. Clips are trimmed if they exceed this. Default: 8. Ignored when `mediaMode='video'`. */
469
526
  maxClipDurationSec?: number;
470
527
  /** Whether to apply device frame mockup. Default: false */
471
528
  applyMockup?: boolean;
@@ -613,6 +670,30 @@ export interface RunTelemetry {
613
670
  /** Per-group mock data application result */
614
671
  mockDataGroupResults?: Record<string, 'applied' | 'skipped'>;
615
672
  }
673
+ /**
674
+ * Per-opcode timing entry emitted when `mediaMode='video'`. Consumed by the
675
+ * compositor and telemetry tooling to align narration overlays and retain
676
+ * interaction metadata. `bbox` is captured BEFORE the action runs (the
677
+ * targeted element may move or disappear after).
678
+ */
679
+ export interface OpcodeTiming {
680
+ stepIndex: number;
681
+ stepId?: string;
682
+ opcodeKind: OpcodeKind;
683
+ variantId: string;
684
+ /** clipId of the active BEGIN_CLIP context, if any */
685
+ clipId?: string;
686
+ /** Wall-clock ms relative to the start of the active clip recording */
687
+ timecodeStartMs: number;
688
+ timecodeEndMs: number;
689
+ /** Bounding box of the targeted element at action time (page coords). null if no DOM target. */
690
+ bbox?: {
691
+ x: number;
692
+ y: number;
693
+ width: number;
694
+ height: number;
695
+ } | null;
696
+ }
616
697
  export interface RunResult {
617
698
  programId: string;
618
699
  success: boolean;
@@ -622,6 +703,11 @@ export interface RunResult {
622
703
  healerPatches: HealerPatch[];
623
704
  /** Total run duration (ms) */
624
705
  totalDurationMs: number;
706
+ /**
707
+ * Per-opcode timing entries — populated when `mediaMode='video'`. Empty
708
+ * array for `screenshot` and `clip` modes.
709
+ */
710
+ opcodeTimings: OpcodeTiming[];
625
711
  error?: string;
626
712
  }
627
713
  export interface WaitCondition {
@@ -645,7 +731,17 @@ export interface ClickOptions {
645
731
  button?: 'left' | 'right' | 'middle';
646
732
  }
647
733
  export interface RecordingOptions {
648
- mediaMode: 'clip';
734
+ mediaMode: 'clip' | 'video';
735
+ /**
736
+ * Physical recording surface. Video capture records the delivery surface
737
+ * directly (1920×1080 by default).
738
+ */
739
+ captureResolution?: {
740
+ width: number;
741
+ height: number;
742
+ };
743
+ /** Override the capture loop frame rate. Clamped to 1..30 by the loop. */
744
+ captureFps?: number;
649
745
  }
650
746
  export interface RecordingResult {
651
747
  buffer: Buffer;
@@ -673,6 +769,19 @@ export interface RuntimeAdapter {
673
769
  takeCleanScreenshot(): Promise<Buffer>;
674
770
  beginRecording(options: RecordingOptions): Promise<void>;
675
771
  endRecording(): Promise<RecordingResult>;
772
+ /**
773
+ * Page-coord bounding box of the first element matching `selector`. Returns
774
+ * null if the selector does not match a visible element. Used by the
775
+ * `mediaMode='video'` runtime to capture interaction metadata before each
776
+ * visible action. Optional — implementations that cannot resolve a bbox
777
+ * should leave this method off and the runtime will record `bbox: null`.
778
+ */
779
+ getElementBoundingBox?(selector: string): Promise<{
780
+ x: number;
781
+ y: number;
782
+ width: number;
783
+ height: number;
784
+ } | null>;
676
785
  setLocale(locale: string): Promise<void>;
677
786
  setColorScheme(scheme: 'light' | 'dark'): Promise<void>;
678
787
  reloadPage?(): Promise<void>;
@@ -16,6 +16,7 @@ export const OPCODE_KINDS = [
16
16
  'TYPE',
17
17
  'PRESS_KEY',
18
18
  'WAIT_FOR',
19
+ 'SLEEP',
19
20
  'SET_LOCALE',
20
21
  'SET_THEME',
21
22
  'SCROLL',
@@ -55,7 +56,7 @@ export const DEFAULT_RECOVERY_POLICY = {
55
56
  allowHealer: false,
56
57
  };
57
58
  // ── Artifact spec ───────────────────────────────────────────────────
58
- export const MEDIA_MODES = ['screenshot', 'clip'];
59
+ export const MEDIA_MODES = ['screenshot', 'clip', 'video'];
59
60
  export const DEFAULT_CIRCUIT_BREAKER = {
60
61
  maxPerOpcode: 3,
61
62
  maxPerPage: 5,
package/dist/index.d.ts CHANGED
@@ -1,6 +1,10 @@
1
- export type { ExecutionProgram, ExecutionOpcode, OpcodeKind, SemanticTarget, PostconditionSpec, PostconditionType, RecoveryPolicy, VariantSpec, PreconditionSpec, ArtifactSpec, MediaMode, CircuitBreakerConfig, HealerPatch, OpcodeResult, OpcodeResultStatus, VariantResult, ArtifactResult, RunResult, RunTelemetry, LLMStepUsage, LLMStepType, RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult, HoverOpcode, SelectOptionOpcode, CheckOpcode, DoubleClickOpcode, CloneElementOpcode, InjectMockDataOpcode, RemoveElementOpcode, SetAttributeOpcode, MockDataSlot, MockDataRow, MockDataGroup, } from './execution-types.js';
1
+ export type { ExecutionProgram, ExecutionOpcode, OpcodeKind, SemanticTarget, PostconditionSpec, PostconditionType, RecoveryPolicy, VariantSpec, PreconditionSpec, ArtifactSpec, MediaMode, CircuitBreakerConfig, HealerPatch, OpcodeResult, OpcodeResultStatus, VariantResult, ArtifactResult, RunResult, RunTelemetry, LLMStepUsage, LLMStepType, RuntimeAdapter, ClickOptions, WaitCondition, RecordingOptions, RecordingResult, OpcodeTiming, HoverOpcode, SelectOptionOpcode, CheckOpcode, DoubleClickOpcode, CloneElementOpcode, InjectMockDataOpcode, RemoveElementOpcode, SetAttributeOpcode, MockDataSlot, MockDataRow, MockDataGroup, } from './execution-types.js';
2
2
  export { DEFAULT_RECOVERY_POLICY, DEFAULT_CIRCUIT_BREAKER, VARIANT_PLACEHOLDER, SOFT_OPCODE_KINDS, isSoftOpcodeKind, } from './execution-types.js';
3
3
  export { ExecutionProgramSchema, ExecutionOpcodeSchema, parseProgram, parseOpcode, safeParseProgramResult, MockDataSlotSchema, MockDataRowSchema, MockDataGroupSchema, } from './execution-schema.js';
4
+ export { NarrationSegmentSchema, VideoNarrationOverlaySchema, VideoIngestPayloadSchema, parseVideoIngestPayload, SUPPORTED_TTS_LOCALES, normalizeLocaleTag, primaryLocaleSubtag, resolveSupportedTtsLocale, VIDEO_STATUSES, VIDEO_FLOW_KIND_OPCODE, VIDEO_FLOW_KIND_LEGACY, } from './video-narration-schema.js';
5
+ export type { NarrationSegment, VideoNarrationOverlay, VideoIngestPayload, SupportedTtsLocale, VideoStatus, } from './video-narration-schema.js';
6
+ export { generateTtsChunk, probeAudioDurationMs, ttsTestHooks, TtsError, } from './openrouter-tts.js';
7
+ export type { TtsClientConfig, TtsRequest, TtsResponse, TtsResponseFormat, } from './openrouter-tts.js';
4
8
  export { callLLM } from './llm-provider.js';
5
9
  export type { LLMProviderConfig, LLMCallResult } from './llm-provider.js';
6
10
  export { normalizeAllowedOrigins, normalizeHttpOrigin, signExecutionProgramEnvelope, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
package/dist/index.js CHANGED
@@ -4,6 +4,8 @@
4
4
  // Server-side callers must use specific subpaths (ESLint enforces this).
5
5
  export { DEFAULT_RECOVERY_POLICY, DEFAULT_CIRCUIT_BREAKER, VARIANT_PLACEHOLDER, SOFT_OPCODE_KINDS, isSoftOpcodeKind, } from './execution-types.js';
6
6
  export { ExecutionProgramSchema, ExecutionOpcodeSchema, parseProgram, parseOpcode, safeParseProgramResult, MockDataSlotSchema, MockDataRowSchema, MockDataGroupSchema, } from './execution-schema.js';
7
+ export { NarrationSegmentSchema, VideoNarrationOverlaySchema, VideoIngestPayloadSchema, parseVideoIngestPayload, SUPPORTED_TTS_LOCALES, normalizeLocaleTag, primaryLocaleSubtag, resolveSupportedTtsLocale, VIDEO_STATUSES, VIDEO_FLOW_KIND_OPCODE, VIDEO_FLOW_KIND_LEGACY, } from './video-narration-schema.js';
8
+ export { generateTtsChunk, probeAudioDurationMs, ttsTestHooks, TtsError, } from './openrouter-tts.js';
7
9
  // ── LLM-backed helpers (OpenAI, no browser) ─────────────────────────
8
10
  export { callLLM } from './llm-provider.js';
9
11
  export { normalizeAllowedOrigins, normalizeHttpOrigin, signExecutionProgramEnvelope, verifySignedExecutionProgramEnvelope, } from './program-signing.js';
@@ -1,10 +1,20 @@
1
1
  import type { Page } from 'playwright';
2
2
  export interface BezierMoveOptions {
3
- /** Total duration in milliseconds. Default: proportional to distance (300–800ms) */
3
+ /** Total duration in milliseconds. Default: proportional to distance + pace. */
4
4
  durationMs?: number;
5
- /** Number of intermediate points. Default: 30 */
5
+ /** Number of intermediate points. Default: derived from durationMs (one step per 16ms). */
6
6
  steps?: number;
7
+ /**
8
+ * Pace profile when `durationMs` is not explicit. AUT-57 video demos use
9
+ * `'natural'` for deliberate, human-readable cursor motion; the default
10
+ * `'fast'` keeps short clip GIFs snappy.
11
+ *
12
+ * Both profiles use the same √distance · factor formula but different
13
+ * (min, max, factor) tuples — see `paceProfile`.
14
+ */
15
+ pace?: BezierMovePace;
7
16
  }
17
+ export type BezierMovePace = 'fast' | 'natural';
8
18
  /**
9
19
  * Move the mouse from `from` to `to` along a cubic Bezier curve with
10
20
  * smooth controlled motion: ease-in-out timing, gentle proportional curves.
@@ -1,3 +1,29 @@
1
+ const FAST_PROFILE = {
2
+ minMs: 80,
3
+ maxMs: 700,
4
+ factor: 28,
5
+ hoverPauseMs: { min: 80, max: 160 },
6
+ pressMs: 70,
7
+ hoverDwellMs: { min: 200, max: 400 },
8
+ };
9
+ /**
10
+ * Natural pace — used by `mediaMode='video'` demos. The min floor is high
11
+ * enough that *any* cursor move reads as deliberate (a flash to a 100px-away
12
+ * nav link still takes >500ms), the cap leaves long traverses at ~1.5s
13
+ * (further than that drags), and the post-arrival beats are wider so each
14
+ * click feels like a real person committing to it.
15
+ */
16
+ const NATURAL_PROFILE = {
17
+ minMs: 600,
18
+ maxMs: 1500,
19
+ factor: 50,
20
+ hoverPauseMs: { min: 200, max: 350 },
21
+ pressMs: 90,
22
+ hoverDwellMs: { min: 400, max: 700 },
23
+ };
24
+ function paceProfile(pace) {
25
+ return pace === 'natural' ? NATURAL_PROFILE : FAST_PROFILE;
26
+ }
1
27
  /** Ease-in-out cubic timing function */
2
28
  function easeInOut(t) {
3
29
  return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
@@ -23,7 +49,9 @@ export async function moveMouse(page, from, to, options = {}) {
23
49
  const distance = Math.sqrt(dx * dx + dy * dy);
24
50
  if (distance < 2)
25
51
  return; // Already there
26
- const durationMs = options.durationMs ?? Math.min(700, Math.max(80, Math.sqrt(distance) * 28));
52
+ const profile = paceProfile(options.pace);
53
+ const durationMs = options.durationMs
54
+ ?? Math.min(profile.maxMs, Math.max(profile.minMs, Math.sqrt(distance) * profile.factor));
27
55
  const steps = options.steps ?? Math.max(4, Math.ceil(durationMs / 16));
28
56
  const msPerStep = durationMs / steps;
29
57
  // Control points: straight line for short moves, gentle proportional arc for longer ones.
@@ -74,30 +102,32 @@ export async function moveMouse(page, from, to, options = {}) {
74
102
  * is performed at `target` without a preceding Bezier move (first action).
75
103
  */
76
104
  export async function animatedClick(page, target, fromCurrent, options = {}) {
105
+ const profile = paceProfile(options.pace);
77
106
  if (fromCurrent) {
78
107
  await moveMouse(page, fromCurrent, target, options);
79
108
  }
80
109
  else {
81
110
  await page.mouse.move(target.x, target.y);
82
111
  }
83
- // Brief hover pause before clicking (human-like)
84
- await page.waitForTimeout(80 + Math.random() * 80);
112
+ const hoverSpread = profile.hoverPauseMs.max - profile.hoverPauseMs.min;
113
+ await page.waitForTimeout(profile.hoverPauseMs.min + Math.random() * hoverSpread);
85
114
  await page.mouse.down();
86
- await page.waitForTimeout(70);
115
+ await page.waitForTimeout(profile.pressMs);
87
116
  await page.mouse.up();
88
117
  }
89
118
  /**
90
119
  * Move the mouse to `target` (for hover/highlight actions) without clicking.
91
120
  */
92
121
  export async function animatedHover(page, target, fromCurrent, options = {}) {
122
+ const profile = paceProfile(options.pace);
93
123
  if (fromCurrent) {
94
124
  await moveMouse(page, fromCurrent, target, options);
95
125
  }
96
126
  else {
97
127
  await page.mouse.move(target.x, target.y);
98
128
  }
99
- // Brief pause at hover position
100
- await page.waitForTimeout(200 + Math.random() * 200);
129
+ const dwellSpread = profile.hoverDwellMs.max - profile.hoverDwellMs.min;
130
+ await page.waitForTimeout(profile.hoverDwellMs.min + Math.random() * dwellSpread);
101
131
  }
102
132
  /**
103
133
  * Type text into the currently focused element at a human-like typing speed.
@@ -7,6 +7,8 @@
7
7
  import type { ExecutionOpcode, MockDataGroup, RuntimeAdapter, VariantSpec } from './execution-types.js';
8
8
  export interface OpcodeActionContext {
9
9
  currentVariant?: VariantSpec;
10
+ /** Disallow opcodes that would reload the page while a clip is being recorded. */
11
+ suppressPageReloads?: boolean;
10
12
  /** Mock data groups available to INJECT_MOCK_DATA. Plumbed from ExecutionProgram. */
11
13
  mockDataGroups?: MockDataGroup[];
12
14
  /**
@@ -121,10 +121,19 @@ export async function executeOpcodeCoreAction(opcode, adapter, context = {}) {
121
121
  }
122
122
  break;
123
123
  }
124
+ case 'SLEEP': {
125
+ // Hold execution for the configured duration. Demo video runs rewrite
126
+ // narration anchors before execution so visible actions align with
127
+ // audio windows. The opcode `timeoutMs` budget is independent — schema
128
+ // pins durationMs to 60s and the runner gives us a slightly larger
129
+ // budget so the await doesn't trip the per-opcode deadline.
130
+ await new Promise((resolve) => setTimeout(resolve, opcode.durationMs));
131
+ break;
132
+ }
124
133
  case 'SET_LOCALE':
125
- return applyLocaleOpcode(opcode, adapter, context.currentVariant);
134
+ return applyLocaleOpcode(opcode, adapter, context.currentVariant, context.suppressPageReloads);
126
135
  case 'SET_THEME':
127
- return applyThemeOpcode(opcode, adapter, context.currentVariant);
136
+ return applyThemeOpcode(opcode, adapter, context.currentVariant, context.suppressPageReloads);
128
137
  case 'SCROLL':
129
138
  if (opcode.targetSelector) {
130
139
  await adapter.scrollIntoView(opcode.targetSelector);
@@ -252,16 +261,25 @@ function resolveStorageHintValues(hints, replacement, fieldName) {
252
261
  }
253
262
  return { resolvedHints };
254
263
  }
255
- async function applyLocaleOpcode(opcode, adapter, currentVariant) {
264
+ async function applyLocaleOpcode(opcode, adapter, currentVariant, suppressPageReloads = false) {
256
265
  const localeResult = resolveVariantPlaceholder(opcode.locale, currentVariant?.locale, 'SET_LOCALE.locale');
257
266
  if ('error' in localeResult)
258
267
  return { success: false, error: localeResult.error };
259
268
  const resolvedLocale = localeResult.resolved;
260
269
  if (opcode.method === 'browser_context') {
261
- await adapter.setLocale(resolvedLocale);
262
270
  if (adapter.reloadPage) {
271
+ if (suppressPageReloads) {
272
+ return {
273
+ success: false,
274
+ error: 'SET_LOCALE browser_context would reload during active recording; move it before BEGIN_CLIP',
275
+ };
276
+ }
277
+ await adapter.setLocale(resolvedLocale);
263
278
  await adapter.reloadPage();
264
279
  }
280
+ else {
281
+ await adapter.setLocale(resolvedLocale);
282
+ }
265
283
  return { success: true };
266
284
  }
267
285
  if (opcode.method === 'ui_interaction') {
@@ -278,6 +296,14 @@ async function applyLocaleOpcode(opcode, adapter, currentVariant) {
278
296
  if ('error' in resolvedHints) {
279
297
  return { success: false, error: resolvedHints.error };
280
298
  }
299
+ if (adapter.reloadPage) {
300
+ if (suppressPageReloads) {
301
+ return {
302
+ success: false,
303
+ error: 'SET_LOCALE storage mode would reload during active recording; move it before BEGIN_CLIP',
304
+ };
305
+ }
306
+ }
281
307
  const writes = await Promise.all(resolvedHints.resolvedHints.map((hint) => adapter.writeStorageHint({
282
308
  storage: hint.storage,
283
309
  key: hint.key,
@@ -482,7 +508,7 @@ async function applyInjectMockDataTriggerStrategy(opcode, adapter, group) {
482
508
  });
483
509
  return { success: true };
484
510
  }
485
- async function applyThemeOpcode(opcode, adapter, currentVariant) {
511
+ async function applyThemeOpcode(opcode, adapter, currentVariant, suppressPageReloads = false) {
486
512
  const themeResult = resolveVariantPlaceholder(opcode.theme, currentVariant?.theme, 'SET_THEME.theme');
487
513
  if ('error' in themeResult)
488
514
  return { success: false, error: themeResult.error };
@@ -505,6 +531,14 @@ async function applyThemeOpcode(opcode, adapter, currentVariant) {
505
531
  if ('error' in resolvedHints) {
506
532
  return { success: false, error: resolvedHints.error };
507
533
  }
534
+ if (adapter.reloadPage) {
535
+ if (suppressPageReloads) {
536
+ return {
537
+ success: false,
538
+ error: 'SET_THEME storage mode would reload during active recording; move it before BEGIN_CLIP',
539
+ };
540
+ }
541
+ }
508
542
  const writes = await Promise.all(resolvedHints.resolvedHints.map((hint) => adapter.writeStorageHint({
509
543
  storage: hint.storage,
510
544
  key: hint.key,
@@ -21,6 +21,8 @@ export interface RecoveryAttemptOptions {
21
21
  remainingTimeMs?: number;
22
22
  maxDeterministicRetries?: number;
23
23
  currentVariant?: VariantSpec;
24
+ allowPageReload?: boolean;
25
+ suppressPageReloads?: boolean;
24
26
  }
25
27
  /** Default recovery chain for consumers that do not inject one. */
26
28
  export declare class NoOpRecoveryChain implements RecoveryChain {