autokap 1.8.6 → 1.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@
4
4
  * Validates ExecutionProgram at compile output (server) and CLI input boundaries.
5
5
  */
6
6
  import { z } from 'zod';
7
+ import { upgradeProgram } from './program-migrations.js';
7
8
  // ── Postcondition ───────────────────────────────────────────────────
8
9
  export const PostconditionSpecSchema = z.object({
9
10
  type: z.enum([
@@ -596,6 +597,8 @@ export const PreconditionSpecSchema = z.object({
596
597
  storageState: StorageStateSchema.optional(),
597
598
  sessionStorage: z.record(z.string(), z.record(z.string(), z.string())).optional(),
598
599
  cookies: z.array(cookieSchema).optional(),
600
+ // AUT-239: active scenario id; runner injects a signed __ak_scenario cookie.
601
+ scenario: z.string().min(1).optional(),
599
602
  }).strict();
600
603
  // ── Artifact spec ───────────────────────────────────────────────────
601
604
  const ResolutionSchema = z.object({
@@ -603,7 +606,6 @@ const ResolutionSchema = z.object({
603
606
  height: z.number().int().positive(),
604
607
  }).strict();
605
608
  const DEFAULT_VIDEO_DELIVERY_RESOLUTION = { width: 1920, height: 1080 };
606
- const LEGACY_VIDEO_CAPTURE_RESOLUTION = { width: 2560, height: 1440 };
607
609
  function resolutionEquals(a, b) {
608
610
  return a.width === b.width && a.height === b.height;
609
611
  }
@@ -623,14 +625,14 @@ export const ArtifactSpecSchema = z.object({
623
625
  }).strict().superRefine((value, ctx) => {
624
626
  if (value.mediaMode === 'video') {
625
627
  const res = value.format?.captureResolution;
626
- const matchesDefault = res ? resolutionEquals(res, DEFAULT_VIDEO_DELIVERY_RESOLUTION) : false;
627
- const matchesLegacy = res ? resolutionEquals(res, LEGACY_VIDEO_CAPTURE_RESOLUTION) : false;
628
- if (!res || (!matchesDefault && !matchesLegacy)) {
628
+ // Legacy 2560×1440 programs are normalized to 1920×1080 by migrate-on-read
629
+ // (program-migrations.ts, migrate_0→1) BEFORE this schema runs, so only the
630
+ // canonical resolution reaches validation here.
631
+ if (!res || !resolutionEquals(res, DEFAULT_VIDEO_DELIVERY_RESOLUTION)) {
629
632
  ctx.addIssue({
630
633
  code: z.ZodIssueCode.custom,
631
634
  path: ['format', 'captureResolution'],
632
- message: "mediaMode='video' requires format.captureResolution = { width: 1920, height: 1080 }; " +
633
- 'legacy 2560x1440 programs are accepted and normalized at runtime',
635
+ message: "mediaMode='video' requires format.captureResolution = { width: 1920, height: 1080 }",
634
636
  });
635
637
  }
636
638
  }
@@ -638,7 +640,17 @@ export const ArtifactSpecSchema = z.object({
638
640
  // ── Full program ────────────────────────────────────────────────────
639
641
  export const ExecutionProgramSchema = z.object({
640
642
  presetId: z.string().min(1),
643
+ // Content-revision counter bumped by the healer; orthogonal to programSchemaVersion (form).
641
644
  programVersion: z.number().int().positive(),
645
+ // FORM version. migrate-on-read (upgradeProgram, run inside parseProgram)
646
+ // stamps this to the current value before validation. Intentionally optional
647
+ // and WITHOUT a Zod default: this schema is reused inside signature
648
+ // verification (program-signing.ts), where injecting a default would mutate
649
+ // the signed payload and break signature symmetry for programs signed without
650
+ // the field. Presence at runtime is guaranteed by upgradeProgram, not here.
651
+ programSchemaVersion: z.number().int().positive().optional(),
652
+ // Provenance: engine semantics the generator targeted. Informational only.
653
+ engineVersion: z.number().int().positive().optional(),
642
654
  mediaMode: z.enum(['screenshot', 'clip', 'video']),
643
655
  baseUrl: StrictUrlSchema,
644
656
  maxParallelCaptures: z.number().int().positive().optional(),
@@ -685,12 +697,51 @@ export const HealerPatchSchema = z.object({
685
697
  }).strict();
686
698
  // ── Typed parse helpers ─────────────────────────────────────────────
687
699
  export function parseProgram(data) {
688
- return ExecutionProgramSchema.parse(data);
700
+ // migrate-on-read: bring any stored FORM up to the current shape before the
701
+ // strict schema runs, so legacy presets parse instead of being rejected en bloc.
702
+ return ExecutionProgramSchema.parse(upgradeProgram(data));
689
703
  }
690
704
  export function parseOpcode(data) {
691
705
  return ExecutionOpcodeSchema.parse(data);
692
706
  }
693
707
  export function safeParseProgramResult(data) {
694
- return ExecutionProgramSchema.safeParse(data);
708
+ return ExecutionProgramSchema.safeParse(upgradeProgram(data));
709
+ }
710
+ /**
711
+ * Returns every NAVIGATE opcode located between a BEGIN_CLIP and its END_CLIP.
712
+ * Pure and mode-agnostic — mirrors the runtime clip window exactly.
713
+ */
714
+ export function findNavigateInClipViolations(program) {
715
+ const violations = [];
716
+ let insideClip = false;
717
+ program.steps.forEach((step, index) => {
718
+ if (step.kind === 'BEGIN_CLIP') {
719
+ insideClip = true;
720
+ return;
721
+ }
722
+ if (step.kind === 'END_CLIP') {
723
+ insideClip = false;
724
+ return;
725
+ }
726
+ if (insideClip && step.kind === 'NAVIGATE') {
727
+ violations.push({
728
+ stepIndex: index,
729
+ message: `NAVIGATE at step ${index} is inside a clip (between BEGIN_CLIP and END_CLIP) and would cause a ` +
730
+ `white-flash cut in the recording. Move it before BEGIN_CLIP (off-camera warmup), or CLICK an ` +
731
+ `in-app <Link> the router intercepts instead.`,
732
+ });
733
+ }
734
+ });
735
+ return violations;
736
+ }
737
+ /**
738
+ * Hard-rejection variant for authoring boundaries (preset create/update).
739
+ * Throws with an actionable message if any NAVIGATE sits inside a clip.
740
+ */
741
+ export function assertNoNavigateInClip(program) {
742
+ const violations = findNavigateInClipViolations(program);
743
+ if (violations.length > 0) {
744
+ throw new Error(violations.map((v) => v.message).join(' '));
745
+ }
695
746
  }
696
747
  //# sourceMappingURL=execution-schema.js.map
@@ -487,6 +487,14 @@ export interface PreconditionSpec {
487
487
  domain: string;
488
488
  path?: string;
489
489
  }>;
490
+ /**
491
+ * Active AutoKap Scenario id (AUT-239). When set and `AUTOKAP_SCENARIO_SECRET`
492
+ * is configured, the runner injects a signed `__ak_scenario=<id>.<sig>` cookie
493
+ * before navigation, so the client app's cooperative scenario layer reads it
494
+ * server-side (SSR-safe) and serves the named state's fixtures. Inert if the
495
+ * secret is absent.
496
+ */
497
+ scenario?: string;
490
498
  }
491
499
  export declare const MEDIA_MODES: readonly ["screenshot", "clip", "video"];
492
500
  export type MediaMode = (typeof MEDIA_MODES)[number];
@@ -534,7 +542,26 @@ export interface ArtifactSpec {
534
542
  }
535
543
  export interface ExecutionProgram {
536
544
  presetId: string;
545
+ /**
546
+ * Content-revision counter, bumped ONLY by the healer after a selector repair
547
+ * (program-patcher.ts). Orthogonal to `programSchemaVersion` — it tracks
548
+ * content changes, never the form of the program.
549
+ */
537
550
  programVersion: number;
551
+ /**
552
+ * FORM version of the program, driving migrate-on-read (program-migrations.ts).
553
+ * Optional on the type so hand-built literals (fixtures) stay terse; guaranteed
554
+ * present at runtime by `upgradeProgram` (run inside parseProgram), which stamps
555
+ * it to the current value. Absent = v0 (oldest form). Read with
556
+ * `?? CURRENT_PROGRAM_SCHEMA_VERSION` when stamping.
557
+ */
558
+ programSchemaVersion?: number;
559
+ /**
560
+ * Provenance stamp: the engine semantics version the generator compiled this
561
+ * program against. Informational only — the runtime engine always applies the
562
+ * current semantics regardless. Absent = legacy (pre-versioning) program.
563
+ */
564
+ engineVersion?: number;
538
565
  mediaMode: MediaMode;
539
566
  baseUrl: string;
540
567
  /** Server-resolved concurrency cap for this run, derived from the owner's plan. */
@@ -673,6 +700,22 @@ export interface ArtifactResult {
673
700
  /** Favicon extracted from the captured page */
674
701
  tabIconData?: Buffer;
675
702
  tabIconMimeType?: string;
703
+ /**
704
+ * AUT-240 (Layer 4): the capture was produced under a degraded signal — an
705
+ * AKTree probe that kept throwing was assumed-OK as a last resort, or the page
706
+ * never reached a visually-stable state. "Assume OK, but flag it." Q4 decision:
707
+ * produce-only for now (a downstream consumer in gallery / post-capture
708
+ * verification is a later phase); no LLM is forced off this flag.
709
+ */
710
+ lowConfidence?: boolean;
711
+ /** Why the artifact was flagged low-confidence (human-readable). */
712
+ lowConfidenceReason?: string;
713
+ /**
714
+ * AUT-241 — navigation-watcher warnings captured while this clip/video was
715
+ * recording (e.g. a full document load mid-take = white flash + cursor loss).
716
+ * Carried up to `RunResult.warnings`; diagnostic only, never fails the run.
717
+ */
718
+ warnings?: string[];
676
719
  }
677
720
  export type LLMStepType = 'capture_verification' | 'alt_text_generation' | 'healer_invocation';
678
721
  export interface LLMStepUsage {
@@ -763,6 +806,13 @@ export interface RunResult {
763
806
  * with the version actually captured on the page.
764
807
  */
765
808
  detectedAppVersion?: string | null;
809
+ /**
810
+ * AUT-241 — non-fatal warnings aggregated from every clip/video recording in
811
+ * the run (full document loads mid-take, unexpected page-side navigations).
812
+ * Empty/undefined when nothing was flagged. Anti-cut policy surfaces these
813
+ * instead of masking the cut; deployed presets are grandfathered at run.
814
+ */
815
+ warnings?: string[];
766
816
  error?: string;
767
817
  }
768
818
  export interface WaitCondition {
@@ -770,6 +820,38 @@ export interface WaitCondition {
770
820
  state: 'visible' | 'attached';
771
821
  timeoutMs: number;
772
822
  }
823
+ /**
824
+ * Cheap, side-effect-free snapshot of page activity (AUT-240, Layer C).
825
+ * Compared across polls by the runner's progress watchdog to distinguish a
826
+ * slow-but-progressing page (extend the wait) from a genuinely stuck one (cut).
827
+ */
828
+ export interface ProgressSnapshot {
829
+ /**
830
+ * Monotonic count of FIRST-PARTY network lifecycle events
831
+ * (request/finished/failed) — same site as the live page origin. Third-party
832
+ * telemetry is excluded so it cannot masquerade as progress.
833
+ */
834
+ networkEventCount: number;
835
+ /** First-party requests issued but not yet finished or failed. */
836
+ inflightRequests: number;
837
+ /** `Date.now()` of the last observed first-party network lifecycle event. */
838
+ lastNetworkActivityAtMs: number;
839
+ /** `document.readyState`, or 'unknown' if unreadable. */
840
+ readyState: string;
841
+ /** Total element count — a cheap DOM-churn signal — or -1 if unreadable. */
842
+ domNodeCount: number;
843
+ /** True when the readability probe threw (itself a sign of navigation). */
844
+ navigating?: boolean;
845
+ }
846
+ /** Result of `RuntimeAdapter.waitForVisuallyStable` (AUT-240, Layer B). */
847
+ export interface VisualStabilityResult {
848
+ /** Whether the page reached a clean, stable state before the deadline. */
849
+ stable: boolean;
850
+ /** Human-readable explanation (which signal stayed noisy, if any). */
851
+ reason: string;
852
+ /** Total time spent stabilizing (ms). */
853
+ waitedMs: number;
854
+ }
773
855
  export interface ClickOptions {
774
856
  /** Force click even if element is covered */
775
857
  force?: boolean;
@@ -822,6 +904,12 @@ export interface RecordingResult {
822
904
  durationMs: number;
823
905
  mimeType: string;
824
906
  trimStartMs?: number;
907
+ /**
908
+ * AUT-241 — human-readable warnings collected by the navigation watcher
909
+ * during this recording window (e.g. a full document load = white flash +
910
+ * cursor loss). Surfaced up to `RunResult.warnings`; never fails the run.
911
+ */
912
+ warnings?: string[];
825
913
  }
826
914
  export interface TypeOptions {
827
915
  /**
@@ -887,6 +975,14 @@ export interface RuntimeAdapter {
887
975
  * adapters that cannot resolve a title should leave this method off.
888
976
  */
889
977
  getPageTitle?(): Promise<string | null>;
978
+ /**
979
+ * Text content of the first element matching `selector`, read live from the
980
+ * DOM via Playwright (`locator().first().textContent()`). Preferred over the
981
+ * AKTree for `text_contains` postconditions (AUT-240, Layer A). Returns null
982
+ * if the selector misses or the read fails. Optional — adapters without it
983
+ * make `text_contains` fall back to the AKTree.
984
+ */
985
+ getTextContent?(selector: string): Promise<string | null>;
890
986
  /**
891
987
  * Read the captured app's version from the live page (meta tag, window
892
988
  * global, or data attribute). Mirrors `extractAppVersionFromHtml` server-side
@@ -989,4 +1085,24 @@ export interface RuntimeAdapter {
989
1085
  clickHidden?(opts: {
990
1086
  selector: string;
991
1087
  }): Promise<void>;
1088
+ /**
1089
+ * Cheap snapshot of page activity used by the runner's progress watchdog to
1090
+ * decide whether a wait is "slow-but-progressing" (keep waiting) or "stuck"
1091
+ * (cut). Must never reject — implementations catch internally and set
1092
+ * `navigating: true` when the readability probe throws. Adapters that cannot
1093
+ * provide it leave it off; the runner then falls back to fixed budgets.
1094
+ */
1095
+ getProgressSnapshot?(): Promise<ProgressSnapshot>;
1096
+ /**
1097
+ * Wait for the page to be visually stable before a screenshot (Layer B):
1098
+ * fonts ready, images loaded, no semantic loaders ([aria-busy]/progressbar)
1099
+ * visible, DOM quiet, with a bounded pixel-convergence fallback. Best-effort —
1100
+ * never throws and never blocks the capture: when it cannot reach a clean
1101
+ * state it returns `stable: false` with a reason and the runner captures
1102
+ * anyway. Adapters that cannot provide it leave it off; the runner falls back
1103
+ * to the legacy `smartWaitForStability`.
1104
+ */
1105
+ waitForVisuallyStable?(options?: {
1106
+ maxWaitMs?: number;
1107
+ }): Promise<VisualStabilityResult>;
992
1108
  }
@@ -5,7 +5,7 @@
5
5
  * Executes opcodes sequentially, verifies postconditions,
6
6
  * delegates to recovery chain on failure, and respects circuit breaker.
7
7
  */
8
- import type { ExecutionProgram, ExecutionOpcode, RuntimeAdapter, OpcodeResultStatus, RunResult, HealerPatch, VariantSpec } from './execution-types.js';
8
+ import type { ExecutionProgram, ExecutionOpcode, RuntimeAdapter, OpcodeResultStatus, RunResult, HealerPatch, VariantSpec, ProgressSnapshot } from './execution-types.js';
9
9
  import type { LLMProviderConfig, LLMCallResult } from './llm-provider.js';
10
10
  export interface RecoveryChain {
11
11
  attempt(failedOpcode: ExecutionOpcode, opcodeIndex: number, adapter: RuntimeAdapter, options?: RecoveryAttemptOptions): Promise<RecoveryAttemptResult>;
@@ -19,6 +19,13 @@ export interface RecoveryAttemptResult {
19
19
  }
20
20
  export interface RecoveryAttemptOptions {
21
21
  remainingTimeMs?: number;
22
+ /**
23
+ * AUT-240 (Phase 5): absolute global wait deadline. Lets recovery re-checks
24
+ * extend-on-progress (like the main path) instead of replaying a fixed budget.
25
+ */
26
+ globalDeadlineMs?: number;
27
+ /** Progress probe for the recovery watchdog (omitted ⇒ fixed budgets). */
28
+ getProgress?: () => Promise<ProgressSnapshot | null>;
22
29
  maxDeterministicRetries?: number;
23
30
  currentVariant?: VariantSpec;
24
31
  allowPageReload?: boolean;
@@ -6,7 +6,8 @@
6
6
  * delegates to recovery chain on failure, and respects circuit breaker.
7
7
  */
8
8
  import { isSoftOpcodeKind } from './execution-types.js';
9
- import { evaluatePostcondition } from './postcondition.js';
9
+ import { evaluatePostcondition, evaluatePostconditionWithProgress } from './postcondition.js';
10
+ import { WAIT_CONTRACT_VERSION, resolveGlobalWaitDeadlineMs, runWithProgressBudget, } from './wait-contract.js';
10
11
  import { ActionVerifier } from './action-verifier.js';
11
12
  import { CircuitBreaker } from './circuit-breaker.js';
12
13
  import { smartWaitForStability } from './smart-wait.js';
@@ -98,6 +99,14 @@ function resolveRuntimePostcondition(opcode) {
98
99
  }
99
100
  return opcode.postcondition;
100
101
  }
102
+ /** Mark the variant low-confidence once (keeps the first reason). */
103
+ function recordLowConfidence(state, reason) {
104
+ if (state.lowConfidence)
105
+ return;
106
+ state.lowConfidence = true;
107
+ state.lowConfidenceReason = reason;
108
+ logger.debug(`[run] low-confidence flagged: ${reason}`);
109
+ }
101
110
  // ── Main execution function ─────────────────────────────────────────
102
111
  export async function executeProgram(program, createAdapter, options = {}) {
103
112
  const recoveryChain = options.recoveryChain ?? new NoOpRecoveryChain();
@@ -117,6 +126,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
117
126
  healerInvocations: 0,
118
127
  circuitBreakerTrips: 0,
119
128
  };
129
+ logger.debug(`[run] wait contract v${WAIT_CONTRACT_VERSION} (adaptive budgets + visual stability)`);
120
130
  let nextVariantIndex = 0;
121
131
  const workerCount = Math.min(maxParallelVariants, program.variants.length);
122
132
  const workers = Array.from({ length: workerCount }, async () => {
@@ -148,6 +158,9 @@ export async function executeProgram(program, createAdapter, options = {}) {
148
158
  const aborted = options.abortSignal?.aborted && completedVariantResults.length < program.variants.length;
149
159
  const success = !aborted && completedVariantResults.length > 0 && completedVariantResults.every(v => v.success);
150
160
  const detectedAppVersion = completedVariantResults.reduce((acc, variantResult) => acc ?? (variantResult.detectedAppVersion ?? null), null);
161
+ // AUT-241 — surface (don't mask) cuts: aggregate every recording warning from
162
+ // each variant's clip/video artifacts. Diagnostic only; never affects success.
163
+ const aggregatedWarnings = completedVariantResults.flatMap((v) => v.artifacts.flatMap((a) => a.warnings ?? []));
151
164
  return {
152
165
  programId: program.presetId,
153
166
  success,
@@ -157,6 +170,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
157
170
  opcodeTimings,
158
171
  totalDurationMs: Date.now() - startTime,
159
172
  detectedAppVersion,
173
+ warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
160
174
  error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
161
175
  };
162
176
  }
@@ -302,6 +316,13 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
302
316
  const startTime = Date.now();
303
317
  const effectiveTimeoutMs = resolveOpcodeTimeoutMs(opcode);
304
318
  const deadlineMs = startTime + effectiveTimeoutMs;
319
+ // AUT-240 (Layer C): a hard, per-media global deadline for adaptive waits,
320
+ // separate from the compiled per-opcode timeout. The compiled timeout acts as
321
+ // a FLOOR (never a ceiling), so `WAIT_FOR` and postconditions can extend while
322
+ // the page is still progressing, up to this deadline. Interactions stay on the
323
+ // narrow `deadlineMs` (Playwright auto-waiting handles those).
324
+ const globalDeadlineMs = resolveGlobalWaitDeadlineMs(startTime, effectiveTimeoutMs, artifactPlan.mediaMode);
325
+ const getProgress = makeProgressGetter(adapter);
305
326
  const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
306
327
  const isSoft = isSoftOpcodeKind(opcode.kind);
307
328
  // Track page context for circuit breaker
@@ -318,21 +339,41 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
318
339
  await verifier.captureBeforeState(adapter);
319
340
  logger.debug(`[opcode ${index}] captureBeforeState took ${Date.now() - beforeStart}ms`);
320
341
  }
321
- const actionBudgetMs = getRemainingTimeMs(deadlineMs);
342
+ // `WAIT_FOR` is a pure wait: it extends while the page is progressing, up to
343
+ // the global deadline. All other opcodes are interactions bounded by the
344
+ // narrow per-opcode deadline (Playwright auto-waiting covers them).
345
+ const isPureWait = opcode.kind === 'WAIT_FOR';
346
+ const actionDeadlineMs = isPureWait ? globalDeadlineMs : deadlineMs;
347
+ const actionBudgetMs = getRemainingTimeMs(actionDeadlineMs);
322
348
  if (actionBudgetMs <= 0) {
323
349
  const reason = `timeout after ${effectiveTimeoutMs}ms`;
324
- logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${deadlineMs}, now=${Date.now()})`);
350
+ logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${actionDeadlineMs}, now=${Date.now()})`);
325
351
  if (isSoft)
326
352
  return softSkipResult(opcode, index, startTime, reason, telemetry);
327
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
353
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
328
354
  }
329
355
  // For mediaMode='video', capture pre-action timing + bbox metadata inside
330
356
  // the active clip window only. Opcodes outside a clip are not part of the
331
357
  // video output.
332
358
  const preTiming = await capturePreActionTiming(opcode, adapter, executionState.activeClip, artifactPlan);
333
- logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms`);
359
+ logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms${isPureWait ? ' (adaptive)' : ''}`);
334
360
  const actionStart = Date.now();
335
- const result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
361
+ let result;
362
+ if (opcode.kind === 'WAIT_FOR' && getProgress) {
363
+ // Extend-on-progress: give the wait a generous budget (up to the global
364
+ // deadline) and let the watchdog cut it early only when the page is truly
365
+ // stuck. A slow-but-progressing page no longer trips a fixed timeout.
366
+ const waited = await runWithProgressBudget((budgetMs) => executeOpcodeAction({ ...opcode, timeoutMs: Math.max(1, Math.round(budgetMs)) }, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), { startedAtMs: startTime, globalDeadlineMs, minBudgetMs: effectiveTimeoutMs, getProgress });
367
+ result = waited.result ?? {
368
+ success: false,
369
+ error: waited.cut === 'stuck'
370
+ ? `WAIT_FOR cut: page stuck (no progress for ${Math.round(waited.waitedMs)}ms)`
371
+ : `WAIT_FOR cut: global deadline reached, page never settled`,
372
+ };
373
+ }
374
+ else {
375
+ result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
376
+ }
336
377
  logger.debug(`[opcode ${index}] action exec end — took ${Date.now() - actionStart}ms, success=${result.success}${result.error ? `, error=${result.error}` : ''}`);
337
378
  if (preTiming) {
338
379
  const keystrokeOffsetsMs = result.keystrokeTimestampsMs && result.keystrokeTimestampsMs.length > 0
@@ -358,39 +399,46 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
358
399
  const reason = result.error ?? 'action failed';
359
400
  if (isSoft)
360
401
  return softSkipResult(opcode, index, startTime, reason, telemetry);
361
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
402
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
362
403
  }
363
- // Verify postcondition
364
- const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
404
+ // Verify postcondition — extend-on-progress up to the global deadline so a
405
+ // slow action no longer starves it (failure mode #3: clamped to ~1ms).
406
+ const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
365
407
  if (postconditionBudgetMs <= 0) {
366
408
  const reason = `timeout after ${effectiveTimeoutMs}ms`;
367
409
  logger.debug(`[opcode ${index}] no budget left for postcondition check`);
368
410
  if (isSoft)
369
411
  return softSkipResult(opcode, index, startTime, reason, telemetry);
370
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
412
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
371
413
  }
372
414
  const runtimePostcondition = resolveRuntimePostcondition(opcode);
373
415
  const postStart = Date.now();
374
- const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
416
+ const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, postStart, globalDeadlineMs, getProgress);
375
417
  logger.debug(`[opcode ${index}] postcondition (${runtimePostcondition.type}) took ${Date.now() - postStart}ms — passed=${postcondition.passed}, reason="${postcondition.reason}"`);
418
+ if (postcondition.lowConfidence) {
419
+ recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
420
+ }
376
421
  if (!postcondition.passed) {
377
422
  const reason = `postcondition failed: ${postcondition.reason}`;
378
423
  if (isSoft)
379
424
  return softSkipResult(opcode, index, startTime, reason, telemetry);
380
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
425
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
381
426
  }
382
427
  // Verify action effects through the shared policy. Weak `any_change`
383
428
  // postconditions are only meaningful if this verifier observes a real
384
429
  // URL/tree/state/scroll change.
385
430
  if (actionEffectPolicy.captureBefore) {
386
431
  const verification = await verifier.verifyAfterAction(adapter);
432
+ if (verification.lowConfidence) {
433
+ recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
434
+ }
387
435
  if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
388
436
  if (opcode.kind === 'PRESS_KEY' && actionEffectPolicy.noEffectMode === 'allow') {
389
437
  logger.debug(`[opcode ${index}] PRESS_KEY had no DOM effect (${verification.summary}) — ` +
390
438
  `postcondition passed, treating as redundant-but-successful`);
391
439
  }
392
440
  else {
393
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
441
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
394
442
  }
395
443
  }
396
444
  }
@@ -424,7 +472,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
424
472
  const errorMsg = err instanceof Error ? err.message : String(err);
425
473
  if (isSoft)
426
474
  return softSkipResult(opcode, index, startTime, errorMsg, telemetry);
427
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg);
475
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg);
428
476
  }
429
477
  }
430
478
  /** Post-action breathing room (ms) injected between visible interactions
@@ -450,8 +498,9 @@ function sleep(ms) {
450
498
  return new Promise((resolve) => setTimeout(resolve, ms));
451
499
  }
452
500
  // ── Failure handling with recovery ──────────────────────────────────
453
- async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg) {
501
+ async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg) {
454
502
  const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
503
+ const getProgress = makeProgressGetter(adapter);
455
504
  const breakerState = breaker.recordFailure(index, opcode.maxFailures);
456
505
  if (breakerState.tripped) {
457
506
  telemetry.circuitBreakerTrips++;
@@ -464,7 +513,11 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
464
513
  error: `${errorMsg} (circuit breaker: ${breakerState.reason})`,
465
514
  };
466
515
  }
467
- const remainingTimeMs = getRemainingTimeMs(deadlineMs);
516
+ // AUT-240 (Phase 5): recovery budgets to the GLOBAL deadline, not the narrow
517
+ // compiled one. Otherwise an adaptive WAIT_FOR that consumed its compiled
518
+ // budget would leave zero for recovery (failure mode #1) — and selector-repair
519
+ // / healer strategies that could actually fix the failure would never run.
520
+ const remainingTimeMs = getRemainingTimeMs(globalDeadlineMs);
468
521
  if (remainingTimeMs <= 0) {
469
522
  return {
470
523
  opcodeIndex: index,
@@ -490,6 +543,8 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
490
543
  }
491
544
  const recovery = await recoveryChain.attempt(opcode, index, adapter, {
492
545
  remainingTimeMs,
546
+ globalDeadlineMs,
547
+ getProgress,
493
548
  maxDeterministicRetries: Math.max(0, opcode.maxFailures - breakerState.opcodeFailures),
494
549
  currentVariant,
495
550
  allowPageReload: !executionState.activeClip,
@@ -511,7 +566,7 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
511
566
  if (recovery.patch) {
512
567
  healerPatches.push(recovery.patch);
513
568
  }
514
- const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
569
+ const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
515
570
  if (postconditionBudgetMs <= 0) {
516
571
  return {
517
572
  opcodeIndex: index,
@@ -523,7 +578,10 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
523
578
  };
524
579
  }
525
580
  const runtimePostcondition = resolveRuntimePostcondition(opcode);
526
- const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
581
+ const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, Date.now(), globalDeadlineMs, getProgress);
582
+ if (postcondition.lowConfidence) {
583
+ recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
584
+ }
527
585
  if (!postcondition.passed) {
528
586
  return {
529
587
  opcodeIndex: index,
@@ -536,6 +594,9 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
536
594
  }
537
595
  if (actionEffectPolicy.captureBefore) {
538
596
  const verification = await verifier.verifyAfterAction(adapter);
597
+ if (verification.lowConfidence) {
598
+ recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
599
+ }
539
600
  if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
540
601
  return {
541
602
  opcodeIndex: index,
@@ -611,13 +672,34 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
611
672
  assertSurfacePostconditionSource(opcode);
612
673
  return evaluateSurfaceAssertion(adapter, opcode.selectors, opcode.matchAll);
613
674
  case 'CAPTURE_SCREENSHOT': {
614
- const stability = await smartWaitForStability(adapter, { maxWaitMs: 5000 });
675
+ // AUT-240 (Layer B): stabilize visually before capture but NEVER fail the
676
+ // capture on it. Prefer the adapter's adaptive stabilizer (fonts/images/
677
+ // semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
678
+ // legacy smart-wait for adapters that don't implement it. A page that
679
+ // never fully settles (e.g. a perpetual animation) is captured anyway.
680
+ const stabilize = (maxWaitMs) => adapter.waitForVisuallyStable
681
+ ? adapter.waitForVisuallyStable({ maxWaitMs })
682
+ : smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
683
+ stable: r.stable,
684
+ reason: r.waitedFor.join(', ') || 'unknown',
685
+ }));
686
+ const stability = await stabilize(5000);
687
+ if (!stability.stable) {
688
+ logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
689
+ }
690
+ // AUT-240 (Layer 4): flag the capture low-confidence if a faux-vert was
691
+ // assumed-OK earlier in this variant, or if the page never reached a
692
+ // visually-stable state before this shot. "Assume OK, but flag it." Keep
693
+ // both reasons when both apply (don't let one mask the other).
694
+ const captureLowConfidence = Boolean(executionState.lowConfidence) || !stability.stable;
695
+ const lowConfidenceReasons = [];
696
+ if (executionState.lowConfidence && executionState.lowConfidenceReason) {
697
+ lowConfidenceReasons.push(executionState.lowConfidenceReason);
698
+ }
615
699
  if (!stability.stable) {
616
- return {
617
- success: false,
618
- error: `page not stable before screenshot; unresolved loaders: ${stability.waitedFor.join(', ') || 'unknown'}`,
619
- };
700
+ lowConfidenceReasons.push(`captured before visual stability: ${stability.reason}`);
620
701
  }
702
+ const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
621
703
  const captureUrl = await adapter.getCurrentUrl();
622
704
  const takeBuffer = async () => {
623
705
  if (opcode.elementSelector && adapter.takeElementScreenshot) {
@@ -648,7 +730,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
648
730
  });
649
731
  }
650
732
  if (!verification.passed) {
651
- await smartWaitForStability(adapter, { maxWaitMs: 8000 });
733
+ await stabilize(8000);
652
734
  const retryBuffer = await takeBuffer();
653
735
  const retryVerification = await verifyCaptureQuality(retryBuffer, {
654
736
  expectedDescription: opcode.description,
@@ -733,6 +815,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
733
815
  variantId: currentVariant?.id,
734
816
  tabIconData,
735
817
  tabIconMimeType,
818
+ lowConfidence: captureLowConfidence || undefined,
819
+ lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
736
820
  });
737
821
  break;
738
822
  }
@@ -781,6 +865,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
781
865
  stepDescription: opcode.description,
782
866
  stepIndex: opcodeIndex,
783
867
  variantId: currentVariant?.id,
868
+ // AUT-241 — full-load / unexpected-nav warnings seen during this take.
869
+ warnings: recording.warnings,
784
870
  });
785
871
  break;
786
872
  }
@@ -888,11 +974,16 @@ function resolveClipIdentity(activeClip, opcode) {
888
974
  clipName: opcode.clipName ?? activeClip?.clipName ?? opcode.description,
889
975
  };
890
976
  }
891
- function withClampedPostconditionTimeout(spec, maxWaitMs) {
892
- return {
893
- ...spec,
894
- waitMs: Math.max(1, Math.min(spec.waitMs ?? maxWaitMs, maxWaitMs)),
895
- };
977
+ /**
978
+ * Bind the adapter's optional progress probe for the watchdog, or return
979
+ * undefined when the adapter has none (graceful degradation: waits then fall
980
+ * back to fixed compiled budgets). See `runWithProgressBudget`.
981
+ */
982
+ function makeProgressGetter(adapter) {
983
+ if (!adapter.getProgressSnapshot)
984
+ return undefined;
985
+ const getSnapshot = adapter.getProgressSnapshot.bind(adapter);
986
+ return () => getSnapshot();
896
987
  }
897
988
  function evaluateImmediateAssertion(result, prefix) {
898
989
  return result.passed