npm - autokap - Versions diffs - 1.8.6 → 1.8.8 - Mend

autokap 1.8.6 → 1.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/dist/action-verifier.d.ts +6 -0
package/dist/action-verifier.js +30 -17
package/dist/browser.d.ts +59 -0
package/dist/browser.js +259 -0
package/dist/cli-config.js +7 -12
package/dist/cli-contract.d.ts +5 -9
package/dist/cli-contract.js +11 -38
package/dist/cli-runner.d.ts +0 -1
package/dist/cli-runner.js +74 -59
package/dist/cli.js +7 -7
package/dist/clip-capture-loop.d.ts +28 -7
package/dist/clip-capture-loop.js +102 -19
package/dist/engine-version.d.ts +24 -0
package/dist/engine-version.js +25 -0
package/dist/execution-schema.d.ts +22 -0
package/dist/execution-schema.js +59 -8
package/dist/execution-types.d.ts +116 -0
package/dist/opcode-runner.d.ts +8 -1
package/dist/opcode-runner.js +120 -29
package/dist/postcondition.d.ts +18 -3
package/dist/postcondition.js +75 -27
package/dist/program-hash.d.ts +11 -0
package/dist/program-hash.js +28 -0
package/dist/program-migrations.d.ts +31 -0
package/dist/program-migrations.js +93 -0
package/dist/program-signing.d.ts +11 -0
package/dist/program-signing.js +1 -0
package/dist/recovery-chain.js +8 -11
package/dist/scenario-cookie.d.ts +36 -0
package/dist/scenario-cookie.js +62 -0
package/dist/security.d.ts +21 -0
package/dist/security.js +46 -8
package/dist/server-credit-usage.d.ts +1 -1
package/dist/version.d.ts +1 -0
package/dist/version.js +1 -0
package/dist/video-narration-schema.d.ts +3 -0
package/dist/video-narration-schema.js +3 -0
package/dist/wait-contract.d.ts +104 -0
package/dist/wait-contract.js +144 -0
package/dist/web-playwright-local.d.ts +9 -1
package/dist/web-playwright-local.js +0 -0
package/package.json +2 -2
package/readme.md +9 -15

package/dist/opcode-runner.js CHANGED Viewed

@@ -6,7 +6,8 @@
  * delegates to recovery chain on failure, and respects circuit breaker.
  */
 import { isSoftOpcodeKind } from './execution-types.js';
-import { evaluatePostcondition } from './postcondition.js';
+import { evaluatePostcondition, evaluatePostconditionWithProgress } from './postcondition.js';
+import { WAIT_CONTRACT_VERSION, resolveGlobalWaitDeadlineMs, runWithProgressBudget, } from './wait-contract.js';
 import { ActionVerifier } from './action-verifier.js';
 import { CircuitBreaker } from './circuit-breaker.js';
 import { smartWaitForStability } from './smart-wait.js';
@@ -98,6 +99,14 @@ function resolveRuntimePostcondition(opcode) {
     }
     return opcode.postcondition;
 }
+/** Mark the variant low-confidence once (keeps the first reason). */
+function recordLowConfidence(state, reason) {
+    if (state.lowConfidence)
+        return;
+    state.lowConfidence = true;
+    state.lowConfidenceReason = reason;
+    logger.debug(`[run] low-confidence flagged: ${reason}`);
+}
 // ── Main execution function ─────────────────────────────────────────
 export async function executeProgram(program, createAdapter, options = {}) {
     const recoveryChain = options.recoveryChain ?? new NoOpRecoveryChain();
@@ -117,6 +126,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
         healerInvocations: 0,
         circuitBreakerTrips: 0,
     };
+    logger.debug(`[run] wait contract v${WAIT_CONTRACT_VERSION} (adaptive budgets + visual stability)`);
     let nextVariantIndex = 0;
     const workerCount = Math.min(maxParallelVariants, program.variants.length);
     const workers = Array.from({ length: workerCount }, async () => {
@@ -148,6 +158,9 @@ export async function executeProgram(program, createAdapter, options = {}) {
     const aborted = options.abortSignal?.aborted && completedVariantResults.length < program.variants.length;
     const success = !aborted && completedVariantResults.length > 0 && completedVariantResults.every(v => v.success);
     const detectedAppVersion = completedVariantResults.reduce((acc, variantResult) => acc ?? (variantResult.detectedAppVersion ?? null), null);
+    // AUT-241 — surface (don't mask) cuts: aggregate every recording warning from
+    // each variant's clip/video artifacts. Diagnostic only; never affects success.
+    const aggregatedWarnings = completedVariantResults.flatMap((v) => v.artifacts.flatMap((a) => a.warnings ?? []));
     return {
         programId: program.presetId,
         success,
@@ -157,6 +170,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
         opcodeTimings,
         totalDurationMs: Date.now() - startTime,
         detectedAppVersion,
+        warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
         error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
     };
 }
@@ -302,6 +316,13 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
     const startTime = Date.now();
     const effectiveTimeoutMs = resolveOpcodeTimeoutMs(opcode);
     const deadlineMs = startTime + effectiveTimeoutMs;
+    // AUT-240 (Layer C): a hard, per-media global deadline for adaptive waits,
+    // separate from the compiled per-opcode timeout. The compiled timeout acts as
+    // a FLOOR (never a ceiling), so `WAIT_FOR` and postconditions can extend while
+    // the page is still progressing, up to this deadline. Interactions stay on the
+    // narrow `deadlineMs` (Playwright auto-waiting handles those).
+    const globalDeadlineMs = resolveGlobalWaitDeadlineMs(startTime, effectiveTimeoutMs, artifactPlan.mediaMode);
+    const getProgress = makeProgressGetter(adapter);
     const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
     const isSoft = isSoftOpcodeKind(opcode.kind);
     // Track page context for circuit breaker
@@ -318,21 +339,41 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
             await verifier.captureBeforeState(adapter);
             logger.debug(`[opcode ${index}] captureBeforeState took ${Date.now() - beforeStart}ms`);
         }
-        const actionBudgetMs = getRemainingTimeMs(deadlineMs);
+        // `WAIT_FOR` is a pure wait: it extends while the page is progressing, up to
+        // the global deadline. All other opcodes are interactions bounded by the
+        // narrow per-opcode deadline (Playwright auto-waiting covers them).
+        const isPureWait = opcode.kind === 'WAIT_FOR';
+        const actionDeadlineMs = isPureWait ? globalDeadlineMs : deadlineMs;
+        const actionBudgetMs = getRemainingTimeMs(actionDeadlineMs);
         if (actionBudgetMs <= 0) {
             const reason = `timeout after ${effectiveTimeoutMs}ms`;
-            logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${deadlineMs}, now=${Date.now()})`);
+            logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${actionDeadlineMs}, now=${Date.now()})`);
             if (isSoft)
                 return softSkipResult(opcode, index, startTime, reason, telemetry);
-            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
+            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
         }
         // For mediaMode='video', capture pre-action timing + bbox metadata inside
         // the active clip window only. Opcodes outside a clip are not part of the
         // video output.
         const preTiming = await capturePreActionTiming(opcode, adapter, executionState.activeClip, artifactPlan);
-        logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms`);
+        logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms${isPureWait ? ' (adaptive)' : ''}`);
         const actionStart = Date.now();
-        const result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
+        let result;
+        if (opcode.kind === 'WAIT_FOR' && getProgress) {
+            // Extend-on-progress: give the wait a generous budget (up to the global
+            // deadline) and let the watchdog cut it early only when the page is truly
+            // stuck. A slow-but-progressing page no longer trips a fixed timeout.
+            const waited = await runWithProgressBudget((budgetMs) => executeOpcodeAction({ ...opcode, timeoutMs: Math.max(1, Math.round(budgetMs)) }, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), { startedAtMs: startTime, globalDeadlineMs, minBudgetMs: effectiveTimeoutMs, getProgress });
+            result = waited.result ?? {
+                success: false,
+                error: waited.cut === 'stuck'
+                    ? `WAIT_FOR cut: page stuck (no progress for ${Math.round(waited.waitedMs)}ms)`
+                    : `WAIT_FOR cut: global deadline reached, page never settled`,
+            };
+        }
+        else {
+            result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
+        }
         logger.debug(`[opcode ${index}] action exec end — took ${Date.now() - actionStart}ms, success=${result.success}${result.error ? `, error=${result.error}` : ''}`);
         if (preTiming) {
             const keystrokeOffsetsMs = result.keystrokeTimestampsMs && result.keystrokeTimestampsMs.length > 0
@@ -358,39 +399,46 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
             const reason = result.error ?? 'action failed';
             if (isSoft)
                 return softSkipResult(opcode, index, startTime, reason, telemetry);
-            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
+            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
         }
-        // Verify postcondition
-        const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
+        // Verify postcondition — extend-on-progress up to the global deadline so a
+        // slow action no longer starves it (failure mode #3: clamped to ~1ms).
+        const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
         if (postconditionBudgetMs <= 0) {
             const reason = `timeout after ${effectiveTimeoutMs}ms`;
             logger.debug(`[opcode ${index}] no budget left for postcondition check`);
             if (isSoft)
                 return softSkipResult(opcode, index, startTime, reason, telemetry);
-            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
+            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
         }
         const runtimePostcondition = resolveRuntimePostcondition(opcode);
         const postStart = Date.now();
-        const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
+        const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, postStart, globalDeadlineMs, getProgress);
         logger.debug(`[opcode ${index}] postcondition (${runtimePostcondition.type}) took ${Date.now() - postStart}ms — passed=${postcondition.passed}, reason="${postcondition.reason}"`);
+        if (postcondition.lowConfidence) {
+            recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
+        }
         if (!postcondition.passed) {
             const reason = `postcondition failed: ${postcondition.reason}`;
             if (isSoft)
                 return softSkipResult(opcode, index, startTime, reason, telemetry);
-            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
+            return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
         }
         // Verify action effects through the shared policy. Weak `any_change`
         // postconditions are only meaningful if this verifier observes a real
         // URL/tree/state/scroll change.
         if (actionEffectPolicy.captureBefore) {
             const verification = await verifier.verifyAfterAction(adapter);
+            if (verification.lowConfidence) {
+                recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
+            }
             if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
                 if (opcode.kind === 'PRESS_KEY' && actionEffectPolicy.noEffectMode === 'allow') {
                     logger.debug(`[opcode ${index}] PRESS_KEY had no DOM effect (${verification.summary}) — ` +
                         `postcondition passed, treating as redundant-but-successful`);
                 }
                 else {
-                    return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
+                    return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
                 }
             }
         }
@@ -424,7 +472,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
         const errorMsg = err instanceof Error ? err.message : String(err);
         if (isSoft)
             return softSkipResult(opcode, index, startTime, errorMsg, telemetry);
-        return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg);
+        return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg);
     }
 }
 /** Post-action breathing room (ms) injected between visible interactions
@@ -450,8 +498,9 @@ function sleep(ms) {
     return new Promise((resolve) => setTimeout(resolve, ms));
 }
 // ── Failure handling with recovery ──────────────────────────────────
-async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg) {
+async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg) {
     const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
+    const getProgress = makeProgressGetter(adapter);
     const breakerState = breaker.recordFailure(index, opcode.maxFailures);
     if (breakerState.tripped) {
         telemetry.circuitBreakerTrips++;
@@ -464,7 +513,11 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
             error: `${errorMsg} (circuit breaker: ${breakerState.reason})`,
         };
     }
-    const remainingTimeMs = getRemainingTimeMs(deadlineMs);
+    // AUT-240 (Phase 5): recovery budgets to the GLOBAL deadline, not the narrow
+    // compiled one. Otherwise an adaptive WAIT_FOR that consumed its compiled
+    // budget would leave zero for recovery (failure mode #1) — and selector-repair
+    // / healer strategies that could actually fix the failure would never run.
+    const remainingTimeMs = getRemainingTimeMs(globalDeadlineMs);
     if (remainingTimeMs <= 0) {
         return {
             opcodeIndex: index,
@@ -490,6 +543,8 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
     }
     const recovery = await recoveryChain.attempt(opcode, index, adapter, {
         remainingTimeMs,
+        globalDeadlineMs,
+        getProgress,
         maxDeterministicRetries: Math.max(0, opcode.maxFailures - breakerState.opcodeFailures),
         currentVariant,
         allowPageReload: !executionState.activeClip,
@@ -511,7 +566,7 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
         if (recovery.patch) {
             healerPatches.push(recovery.patch);
         }
-        const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
+        const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
         if (postconditionBudgetMs <= 0) {
             return {
                 opcodeIndex: index,
@@ -523,7 +578,10 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
             };
         }
         const runtimePostcondition = resolveRuntimePostcondition(opcode);
-        const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
+        const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, Date.now(), globalDeadlineMs, getProgress);
+        if (postcondition.lowConfidence) {
+            recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
+        }
         if (!postcondition.passed) {
             return {
                 opcodeIndex: index,
@@ -536,6 +594,9 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
         }
         if (actionEffectPolicy.captureBefore) {
             const verification = await verifier.verifyAfterAction(adapter);
+            if (verification.lowConfidence) {
+                recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
+            }
             if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
                 return {
                     opcodeIndex: index,
@@ -611,13 +672,34 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
                 assertSurfacePostconditionSource(opcode);
                 return evaluateSurfaceAssertion(adapter, opcode.selectors, opcode.matchAll);
             case 'CAPTURE_SCREENSHOT': {
-                const stability = await smartWaitForStability(adapter, { maxWaitMs: 5000 });
+                // AUT-240 (Layer B): stabilize visually before capture but NEVER fail the
+                // capture on it. Prefer the adapter's adaptive stabilizer (fonts/images/
+                // semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
+                // legacy smart-wait for adapters that don't implement it. A page that
+                // never fully settles (e.g. a perpetual animation) is captured anyway.
+                const stabilize = (maxWaitMs) => adapter.waitForVisuallyStable
+                    ? adapter.waitForVisuallyStable({ maxWaitMs })
+                    : smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
+                        stable: r.stable,
+                        reason: r.waitedFor.join(', ') || 'unknown',
+                    }));
+                const stability = await stabilize(5000);
+                if (!stability.stable) {
+                    logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
+                }
+                // AUT-240 (Layer 4): flag the capture low-confidence if a faux-vert was
+                // assumed-OK earlier in this variant, or if the page never reached a
+                // visually-stable state before this shot. "Assume OK, but flag it." Keep
+                // both reasons when both apply (don't let one mask the other).
+                const captureLowConfidence = Boolean(executionState.lowConfidence) || !stability.stable;
+                const lowConfidenceReasons = [];
+                if (executionState.lowConfidence && executionState.lowConfidenceReason) {
+                    lowConfidenceReasons.push(executionState.lowConfidenceReason);
+                }
                 if (!stability.stable) {
-                    return {
-                        success: false,
-                        error: `page not stable before screenshot; unresolved loaders: ${stability.waitedFor.join(', ') || 'unknown'}`,
-                    };
+                    lowConfidenceReasons.push(`captured before visual stability: ${stability.reason}`);
                 }
+                const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
                 const captureUrl = await adapter.getCurrentUrl();
                 const takeBuffer = async () => {
                     if (opcode.elementSelector && adapter.takeElementScreenshot) {
@@ -648,7 +730,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
                         });
                     }
                     if (!verification.passed) {
-                        await smartWaitForStability(adapter, { maxWaitMs: 8000 });
+                        await stabilize(8000);
                         const retryBuffer = await takeBuffer();
                         const retryVerification = await verifyCaptureQuality(retryBuffer, {
                             expectedDescription: opcode.description,
@@ -733,6 +815,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
                     variantId: currentVariant?.id,
                     tabIconData,
                     tabIconMimeType,
+                    lowConfidence: captureLowConfidence || undefined,
+                    lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
                 });
                 break;
             }
@@ -781,6 +865,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
                     stepDescription: opcode.description,
                     stepIndex: opcodeIndex,
                     variantId: currentVariant?.id,
+                    // AUT-241 — full-load / unexpected-nav warnings seen during this take.
+                    warnings: recording.warnings,
                 });
                 break;
             }
@@ -888,11 +974,16 @@ function resolveClipIdentity(activeClip, opcode) {
         clipName: opcode.clipName ?? activeClip?.clipName ?? opcode.description,
     };
 }
-function withClampedPostconditionTimeout(spec, maxWaitMs) {
-    return {
-        ...spec,
-        waitMs: Math.max(1, Math.min(spec.waitMs ?? maxWaitMs, maxWaitMs)),
-    };
+/**
+ * Bind the adapter's optional progress probe for the watchdog, or return
+ * undefined when the adapter has none (graceful degradation: waits then fall
+ * back to fixed compiled budgets). See `runWithProgressBudget`.
+ */
+function makeProgressGetter(adapter) {
+    if (!adapter.getProgressSnapshot)
+        return undefined;
+    const getSnapshot = adapter.getProgressSnapshot.bind(adapter);
+    return () => getSnapshot();
 }
 function evaluateImmediateAssertion(result, prefix) {
     return result.passed

package/dist/postcondition.d.ts CHANGED Viewed

@@ -4,13 +4,28 @@
  * Deterministic evaluation of postconditions after each opcode.
  * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
  */
-import type { RuntimeAdapter, PostconditionSpec } from './execution-types.js';
+import type { RuntimeAdapter, PostconditionSpec, ProgressSnapshot } from './execution-types.js';
 /**
  * Evaluates whether a postcondition holds.
  * Retries internally up to postcondition.waitMs (polling).
  * Returns true if the condition is satisfied, false otherwise.
  */
-export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<{
+export interface PostconditionResult {
     passed: boolean;
     reason: string;
-}>;
+    /**
+     * AUT-240 (decision 2): the check could not be verified deterministically
+     * (an AKTree probe kept throwing) and was assumed-OK as a last resort. The
+     * capture is flagged low-confidence rather than failed.
+     */
+    lowConfidence?: boolean;
+}
+export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<PostconditionResult>;
+/**
+ * Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
+ * gets a generous budget up to the global deadline and the progress watchdog
+ * cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
+ * remaining-budget that could starve the check to ~1ms after a slow action.
+ * Shared by the runner (main path) and the recovery chain (retry re-check).
+ */
+export declare function evaluatePostconditionWithProgress(adapter: RuntimeAdapter, spec: PostconditionSpec, startedAtMs: number, globalDeadlineMs: number, getProgress: (() => Promise<ProgressSnapshot | null>) | undefined): Promise<PostconditionResult>;

package/dist/postcondition.js CHANGED Viewed

@@ -4,12 +4,7 @@
  * Deterministic evaluation of postconditions after each opcode.
  * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
  */
-import { serializeAKTree } from './ak-tree.js';
-/**
- * Evaluates whether a postcondition holds.
- * Retries internally up to postcondition.waitMs (polling).
- * Returns true if the condition is satisfied, false otherwise.
- */
+import { runWithProgressBudget } from './wait-contract.js';
 export async function evaluatePostcondition(adapter, spec) {
     const maxWait = spec.waitMs ?? 5000;
     const pollInterval = 500;
@@ -31,6 +26,29 @@ export async function evaluatePostcondition(adapter, spec) {
     // Final check after timeout
     return checkOnce(adapter, spec, context);
 }
+/**
+ * Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
+ * gets a generous budget up to the global deadline and the progress watchdog
+ * cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
+ * remaining-budget that could starve the check to ~1ms after a slow action.
+ * Shared by the runner (main path) and the recovery chain (retry re-check).
+ */
+export async function evaluatePostconditionWithProgress(adapter, spec, startedAtMs, globalDeadlineMs, getProgress) {
+    // Immediate specs need no adaptive budget.
+    if (spec.type === 'always') {
+        return evaluatePostcondition(adapter, spec);
+    }
+    const compiledWaitMs = spec.waitMs ?? 5000;
+    const waited = await runWithProgressBudget((budgetMs) => evaluatePostcondition(adapter, { ...spec, waitMs: Math.max(1, Math.round(budgetMs)) }), { startedAtMs, globalDeadlineMs, minBudgetMs: compiledWaitMs, getProgress });
+    if (waited.result)
+        return waited.result;
+    return {
+        passed: false,
+        reason: waited.cut === 'stuck'
+            ? `not met (page stuck, no progress for ${Math.round(waited.waitedMs)}ms)`
+            : 'not met (global wait deadline reached)',
+    };
+}
 async function checkOnce(adapter, spec, context) {
     switch (spec.type) {
         case 'route_matches':
@@ -117,16 +135,15 @@ async function checkElementVisible(adapter, selector) {
     catch {
         // Fall through to AKTree check
     }
-    // Fallback: check AKTree
+    // Fallback: a visible node matching the selector in the AKTree.
+    // (AUT-240, Layer A: the old `serializeAKTree().includes(selector)` fallback
+    // was dropped — a substring match on the serialized tree produced false
+    // positives.)
     try {
         const tree = await adapter.getAKTree();
         if (hasVisibleNodeWithSelector(tree, selector)) {
             return { passed: true, reason: `element "${selector}" is visible in AKTree` };
         }
-        const serialized = serializeAKTree(tree);
-        if (serialized.includes(selector.replace(/[[\]"]/g, ''))) {
-            return { passed: true, reason: `element pattern "${selector}" found in serialized AKTree` };
-        }
         return { passed: false, reason: `element "${selector}" not visible` };
     }
     catch {
@@ -147,6 +164,23 @@ async function checkElementAbsent(adapter, selector) {
     }
 }
 async function checkTextContains(adapter, selector, expectedText) {
+    const expected = normalizeText(expectedText);
+    // Playwright-first (AUT-240, Layer A): read the live DOM text.
+    if (adapter.getTextContent) {
+        try {
+            const live = await adapter.getTextContent(selector);
+            if (live !== null && normalizeText(live).includes(expected)) {
+                return { passed: true, reason: `element "${selector}" contains "${expectedText}" (Playwright)` };
+            }
+            // Element found but text didn't match (or selector missed): fall through
+            // to the AKTree, which may surface label/value/aria text the raw
+            // textContent omits.
+        }
+        catch {
+            // Fall through to AKTree.
+        }
+    }
+    // Fallback: AKTree (label / value / own text).
     try {
         const tree = await adapter.getAKTree();
         const node = findNodeBySelector(tree, selector);
@@ -158,9 +192,8 @@ async function checkTextContains(adapter, selector, expectedText) {
             node.value || '',
             node.attributes.__ownText || '',
         ].join(' '));
-        const expected = normalizeText(expectedText);
         if (nodeText.includes(expected)) {
-            return { passed: true, reason: `element "${selector}" contains "${expectedText}"` };
+            return { passed: true, reason: `element "${selector}" contains "${expectedText}" (AKTree)` };
         }
         return { passed: false, reason: `element "${selector}" text "${nodeText}" does not contain "${expectedText}"` };
     }
@@ -168,24 +201,39 @@ async function checkTextContains(adapter, selector, expectedText) {
         return { passed: false, reason: `error checking text: ${err}` };
     }
 }
+function evaluateOverlayTree(tree) {
+    if (tree.overlays.length === 0) {
+        return { passed: true, reason: 'no overlays detected' };
+    }
+    const blocking = tree.overlays.filter(o => o.blocksInteraction);
+    if (blocking.length === 0) {
+        return { passed: true, reason: 'overlays present but none blocking interaction' };
+    }
+    return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
+}
 async function checkOverlayDismissed(adapter) {
     try {
-        const tree = await adapter.getAKTree();
-        // Check if any overlays are reported in the tree
-        if (tree.overlays.length === 0) {
-            return { passed: true, reason: 'no overlays detected' };
-        }
-        // Check if remaining overlays are blocking
-        const blocking = tree.overlays.filter(o => o.blocksInteraction);
-        if (blocking.length === 0) {
-            return { passed: true, reason: 'overlays present but none blocking interaction' };
-        }
-        return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
+        return evaluateOverlayTree(await adapter.getAKTree());
     }
     catch {
-        // If AKTree is unavailable (e.g. page.evaluate failure), assume overlays are dismissed.
-        // The overlay dismissal itself ran; we just can't verify via AKTree.
-        return { passed: true, reason: 'overlay check skipped (AKTree unavailable), assuming dismissed' };
+        // AUT-240 (decision 2): "assume OK, but smart". A first `page.evaluate`
+        // hiccup (e.g. navigation in flight) is no longer assumed-OK immediately —
+        // settle the page and retry the AKTree once.
+        try {
+            if (adapter.waitForVisuallyStable) {
+                await adapter.waitForVisuallyStable({ maxWaitMs: 2000 });
+            }
+            return evaluateOverlayTree(await adapter.getAKTree());
+        }
+        catch {
+            // Still unverifiable: assume dismissed as a last resort, but flag
+            // low-confidence so the post-capture verification scrutinizes it.
+            return {
+                passed: true,
+                reason: 'overlay check unverifiable after settle; assuming dismissed (low-confidence)',
+                lowConfidence: true,
+            };
+        }
     }
 }
 async function checkScreenshotStable(adapter, threshold, context) {

package/dist/program-hash.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Capture Agent — Program content hashing (run provenance)
+ *
+ * Stable content hash of an ExecutionProgram, persisted as `program_hash` on
+ * each run so a screenshot can be traced back to the exact program bytes that
+ * produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
+ * the schema validation import chain.
+ */
+import type { ExecutionProgram } from './execution-types.js';
+/** sha256 of the canonicalized program (stable across runs of the same program). */
+export declare function hashProgram(program: ExecutionProgram): string;

package/dist/program-hash.js ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Capture Agent — Program content hashing (run provenance)
+ *
+ * Stable content hash of an ExecutionProgram, persisted as `program_hash` on
+ * each run so a screenshot can be traced back to the exact program bytes that
+ * produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
+ * the schema validation import chain.
+ */
+import { createHash } from 'node:crypto';
+/** Deterministic JSON serialization with object keys sorted recursively. */
+function stableStringify(value) {
+    if (value === undefined)
+        return 'null';
+    if (value === null || typeof value !== 'object')
+        return JSON.stringify(value);
+    if (Array.isArray(value))
+        return `[${value.map(stableStringify).join(',')}]`;
+    const obj = value;
+    const entries = Object.keys(obj)
+        .sort()
+        .map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`);
+    return `{${entries.join(',')}}`;
+}
+/** sha256 of the canonicalized program (stable across runs of the same program). */
+export function hashProgram(program) {
+    return createHash('sha256').update(stableStringify(program)).digest('hex');
+}
+//# sourceMappingURL=program-hash.js.map

package/dist/program-migrations.d.ts ADDED Viewed

@@ -0,0 +1,31 @@
+/**
+ * Capture Agent — Program FORM migrations (migrate-on-read)
+ *
+ * Old presets are stored at whatever `programSchemaVersion` (FORM) was current
+ * when they were authored. `upgradeProgram` runs a chain of pure
+ * `migrate_vN→vN+1` functions to bring any stored program up to the current
+ * form BEFORE strict schema validation, so the runner only ever sees one shape.
+ *
+ * Properties of this layer (decisions locked in AUT-242):
+ * - Compat forever: the chain is kept indefinitely; no support window.
+ * - Migrate-on-read only: programs are NEVER rewritten back to storage. The
+ *   stored form changes only when the generator recompiles (create/modify).
+ * - Pure + idempotent: a program already at the current form is a no-op.
+ *
+ * This module is intentionally free of Node-only imports so it can be pulled
+ * into the schema validation chain on any runtime. Content hashing
+ * (`node:crypto`) lives in program-hash.ts.
+ */
+/**
+ * Reads the FORM version a raw (pre-migration) program was stored at.
+ * Absent / non-finite ⇒ 0 (the oldest form). Used to stamp run provenance
+ * (`program_schema_version_origin`) before `upgradeProgram` bumps it.
+ */
+export declare function readOriginSchemaVersion(raw: unknown): number;
+/**
+ * Brings any stored program up to {@link CURRENT_PROGRAM_SCHEMA_VERSION} (form)
+ * before strict validation. Pure: clones, never mutates `raw`. Idempotent: a
+ * program already at the current form is returned with only its version stamped.
+ * Non-object input is returned untouched so the schema raises a clean error.
+ */
+export declare function upgradeProgram(raw: unknown): unknown;