autokap 1.8.6 → 1.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/action-verifier.d.ts +6 -0
  2. package/dist/action-verifier.js +30 -17
  3. package/dist/browser.d.ts +59 -0
  4. package/dist/browser.js +259 -0
  5. package/dist/cli-config.js +7 -12
  6. package/dist/cli-contract.d.ts +5 -9
  7. package/dist/cli-contract.js +11 -38
  8. package/dist/cli-runner.d.ts +0 -1
  9. package/dist/cli-runner.js +74 -59
  10. package/dist/cli.js +7 -7
  11. package/dist/clip-capture-loop.d.ts +28 -7
  12. package/dist/clip-capture-loop.js +102 -19
  13. package/dist/engine-version.d.ts +24 -0
  14. package/dist/engine-version.js +25 -0
  15. package/dist/execution-schema.d.ts +22 -0
  16. package/dist/execution-schema.js +59 -8
  17. package/dist/execution-types.d.ts +116 -0
  18. package/dist/opcode-runner.d.ts +8 -1
  19. package/dist/opcode-runner.js +120 -29
  20. package/dist/postcondition.d.ts +18 -3
  21. package/dist/postcondition.js +75 -27
  22. package/dist/program-hash.d.ts +11 -0
  23. package/dist/program-hash.js +28 -0
  24. package/dist/program-migrations.d.ts +31 -0
  25. package/dist/program-migrations.js +93 -0
  26. package/dist/program-signing.d.ts +11 -0
  27. package/dist/program-signing.js +1 -0
  28. package/dist/recovery-chain.js +8 -11
  29. package/dist/scenario-cookie.d.ts +36 -0
  30. package/dist/scenario-cookie.js +62 -0
  31. package/dist/security.d.ts +21 -0
  32. package/dist/security.js +46 -8
  33. package/dist/server-credit-usage.d.ts +1 -1
  34. package/dist/version.d.ts +1 -0
  35. package/dist/version.js +1 -0
  36. package/dist/video-narration-schema.d.ts +3 -0
  37. package/dist/video-narration-schema.js +3 -0
  38. package/dist/wait-contract.d.ts +104 -0
  39. package/dist/wait-contract.js +144 -0
  40. package/dist/web-playwright-local.d.ts +9 -1
  41. package/dist/web-playwright-local.js +0 -0
  42. package/package.json +2 -2
  43. package/readme.md +9 -15
@@ -6,7 +6,8 @@
6
6
  * delegates to recovery chain on failure, and respects circuit breaker.
7
7
  */
8
8
  import { isSoftOpcodeKind } from './execution-types.js';
9
- import { evaluatePostcondition } from './postcondition.js';
9
+ import { evaluatePostcondition, evaluatePostconditionWithProgress } from './postcondition.js';
10
+ import { WAIT_CONTRACT_VERSION, resolveGlobalWaitDeadlineMs, runWithProgressBudget, } from './wait-contract.js';
10
11
  import { ActionVerifier } from './action-verifier.js';
11
12
  import { CircuitBreaker } from './circuit-breaker.js';
12
13
  import { smartWaitForStability } from './smart-wait.js';
@@ -98,6 +99,14 @@ function resolveRuntimePostcondition(opcode) {
98
99
  }
99
100
  return opcode.postcondition;
100
101
  }
102
+ /** Mark the variant low-confidence once (keeps the first reason). */
103
+ function recordLowConfidence(state, reason) {
104
+ if (state.lowConfidence)
105
+ return;
106
+ state.lowConfidence = true;
107
+ state.lowConfidenceReason = reason;
108
+ logger.debug(`[run] low-confidence flagged: ${reason}`);
109
+ }
101
110
  // ── Main execution function ─────────────────────────────────────────
102
111
  export async function executeProgram(program, createAdapter, options = {}) {
103
112
  const recoveryChain = options.recoveryChain ?? new NoOpRecoveryChain();
@@ -117,6 +126,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
117
126
  healerInvocations: 0,
118
127
  circuitBreakerTrips: 0,
119
128
  };
129
+ logger.debug(`[run] wait contract v${WAIT_CONTRACT_VERSION} (adaptive budgets + visual stability)`);
120
130
  let nextVariantIndex = 0;
121
131
  const workerCount = Math.min(maxParallelVariants, program.variants.length);
122
132
  const workers = Array.from({ length: workerCount }, async () => {
@@ -148,6 +158,9 @@ export async function executeProgram(program, createAdapter, options = {}) {
148
158
  const aborted = options.abortSignal?.aborted && completedVariantResults.length < program.variants.length;
149
159
  const success = !aborted && completedVariantResults.length > 0 && completedVariantResults.every(v => v.success);
150
160
  const detectedAppVersion = completedVariantResults.reduce((acc, variantResult) => acc ?? (variantResult.detectedAppVersion ?? null), null);
161
+ // AUT-241 — surface (don't mask) cuts: aggregate every recording warning from
162
+ // each variant's clip/video artifacts. Diagnostic only; never affects success.
163
+ const aggregatedWarnings = completedVariantResults.flatMap((v) => v.artifacts.flatMap((a) => a.warnings ?? []));
151
164
  return {
152
165
  programId: program.presetId,
153
166
  success,
@@ -157,6 +170,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
157
170
  opcodeTimings,
158
171
  totalDurationMs: Date.now() - startTime,
159
172
  detectedAppVersion,
173
+ warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
160
174
  error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
161
175
  };
162
176
  }
@@ -302,6 +316,13 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
302
316
  const startTime = Date.now();
303
317
  const effectiveTimeoutMs = resolveOpcodeTimeoutMs(opcode);
304
318
  const deadlineMs = startTime + effectiveTimeoutMs;
319
+ // AUT-240 (Layer C): a hard, per-media global deadline for adaptive waits,
320
+ // separate from the compiled per-opcode timeout. The compiled timeout acts as
321
+ // a FLOOR (never a ceiling), so `WAIT_FOR` and postconditions can extend while
322
+ // the page is still progressing, up to this deadline. Interactions stay on the
323
+ // narrow `deadlineMs` (Playwright auto-waiting handles those).
324
+ const globalDeadlineMs = resolveGlobalWaitDeadlineMs(startTime, effectiveTimeoutMs, artifactPlan.mediaMode);
325
+ const getProgress = makeProgressGetter(adapter);
305
326
  const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
306
327
  const isSoft = isSoftOpcodeKind(opcode.kind);
307
328
  // Track page context for circuit breaker
@@ -318,21 +339,41 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
318
339
  await verifier.captureBeforeState(adapter);
319
340
  logger.debug(`[opcode ${index}] captureBeforeState took ${Date.now() - beforeStart}ms`);
320
341
  }
321
- const actionBudgetMs = getRemainingTimeMs(deadlineMs);
342
+ // `WAIT_FOR` is a pure wait: it extends while the page is progressing, up to
343
+ // the global deadline. All other opcodes are interactions bounded by the
344
+ // narrow per-opcode deadline (Playwright auto-waiting covers them).
345
+ const isPureWait = opcode.kind === 'WAIT_FOR';
346
+ const actionDeadlineMs = isPureWait ? globalDeadlineMs : deadlineMs;
347
+ const actionBudgetMs = getRemainingTimeMs(actionDeadlineMs);
322
348
  if (actionBudgetMs <= 0) {
323
349
  const reason = `timeout after ${effectiveTimeoutMs}ms`;
324
- logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${deadlineMs}, now=${Date.now()})`);
350
+ logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${actionDeadlineMs}, now=${Date.now()})`);
325
351
  if (isSoft)
326
352
  return softSkipResult(opcode, index, startTime, reason, telemetry);
327
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
353
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
328
354
  }
329
355
  // For mediaMode='video', capture pre-action timing + bbox metadata inside
330
356
  // the active clip window only. Opcodes outside a clip are not part of the
331
357
  // video output.
332
358
  const preTiming = await capturePreActionTiming(opcode, adapter, executionState.activeClip, artifactPlan);
333
- logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms`);
359
+ logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms${isPureWait ? ' (adaptive)' : ''}`);
334
360
  const actionStart = Date.now();
335
- const result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
361
+ let result;
362
+ if (opcode.kind === 'WAIT_FOR' && getProgress) {
363
+ // Extend-on-progress: give the wait a generous budget (up to the global
364
+ // deadline) and let the watchdog cut it early only when the page is truly
365
+ // stuck. A slow-but-progressing page no longer trips a fixed timeout.
366
+ const waited = await runWithProgressBudget((budgetMs) => executeOpcodeAction({ ...opcode, timeoutMs: Math.max(1, Math.round(budgetMs)) }, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), { startedAtMs: startTime, globalDeadlineMs, minBudgetMs: effectiveTimeoutMs, getProgress });
367
+ result = waited.result ?? {
368
+ success: false,
369
+ error: waited.cut === 'stuck'
370
+ ? `WAIT_FOR cut: page stuck (no progress for ${Math.round(waited.waitedMs)}ms)`
371
+ : `WAIT_FOR cut: global deadline reached, page never settled`,
372
+ };
373
+ }
374
+ else {
375
+ result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
376
+ }
336
377
  logger.debug(`[opcode ${index}] action exec end — took ${Date.now() - actionStart}ms, success=${result.success}${result.error ? `, error=${result.error}` : ''}`);
337
378
  if (preTiming) {
338
379
  const keystrokeOffsetsMs = result.keystrokeTimestampsMs && result.keystrokeTimestampsMs.length > 0
@@ -358,39 +399,46 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
358
399
  const reason = result.error ?? 'action failed';
359
400
  if (isSoft)
360
401
  return softSkipResult(opcode, index, startTime, reason, telemetry);
361
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
402
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
362
403
  }
363
- // Verify postcondition
364
- const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
404
+ // Verify postcondition — extend-on-progress up to the global deadline so a
405
+ // slow action no longer starves it (failure mode #3: clamped to ~1ms).
406
+ const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
365
407
  if (postconditionBudgetMs <= 0) {
366
408
  const reason = `timeout after ${effectiveTimeoutMs}ms`;
367
409
  logger.debug(`[opcode ${index}] no budget left for postcondition check`);
368
410
  if (isSoft)
369
411
  return softSkipResult(opcode, index, startTime, reason, telemetry);
370
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
412
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
371
413
  }
372
414
  const runtimePostcondition = resolveRuntimePostcondition(opcode);
373
415
  const postStart = Date.now();
374
- const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
416
+ const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, postStart, globalDeadlineMs, getProgress);
375
417
  logger.debug(`[opcode ${index}] postcondition (${runtimePostcondition.type}) took ${Date.now() - postStart}ms — passed=${postcondition.passed}, reason="${postcondition.reason}"`);
418
+ if (postcondition.lowConfidence) {
419
+ recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
420
+ }
376
421
  if (!postcondition.passed) {
377
422
  const reason = `postcondition failed: ${postcondition.reason}`;
378
423
  if (isSoft)
379
424
  return softSkipResult(opcode, index, startTime, reason, telemetry);
380
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
425
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
381
426
  }
382
427
  // Verify action effects through the shared policy. Weak `any_change`
383
428
  // postconditions are only meaningful if this verifier observes a real
384
429
  // URL/tree/state/scroll change.
385
430
  if (actionEffectPolicy.captureBefore) {
386
431
  const verification = await verifier.verifyAfterAction(adapter);
432
+ if (verification.lowConfidence) {
433
+ recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
434
+ }
387
435
  if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
388
436
  if (opcode.kind === 'PRESS_KEY' && actionEffectPolicy.noEffectMode === 'allow') {
389
437
  logger.debug(`[opcode ${index}] PRESS_KEY had no DOM effect (${verification.summary}) — ` +
390
438
  `postcondition passed, treating as redundant-but-successful`);
391
439
  }
392
440
  else {
393
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
441
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
394
442
  }
395
443
  }
396
444
  }
@@ -424,7 +472,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
424
472
  const errorMsg = err instanceof Error ? err.message : String(err);
425
473
  if (isSoft)
426
474
  return softSkipResult(opcode, index, startTime, errorMsg, telemetry);
427
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg);
475
+ return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg);
428
476
  }
429
477
  }
430
478
  /** Post-action breathing room (ms) injected between visible interactions
@@ -450,8 +498,9 @@ function sleep(ms) {
450
498
  return new Promise((resolve) => setTimeout(resolve, ms));
451
499
  }
452
500
  // ── Failure handling with recovery ──────────────────────────────────
453
- async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg) {
501
+ async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg) {
454
502
  const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
503
+ const getProgress = makeProgressGetter(adapter);
455
504
  const breakerState = breaker.recordFailure(index, opcode.maxFailures);
456
505
  if (breakerState.tripped) {
457
506
  telemetry.circuitBreakerTrips++;
@@ -464,7 +513,11 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
464
513
  error: `${errorMsg} (circuit breaker: ${breakerState.reason})`,
465
514
  };
466
515
  }
467
- const remainingTimeMs = getRemainingTimeMs(deadlineMs);
516
+ // AUT-240 (Phase 5): recovery budgets to the GLOBAL deadline, not the narrow
517
+ // compiled one. Otherwise an adaptive WAIT_FOR that consumed its compiled
518
+ // budget would leave zero for recovery (failure mode #1) — and selector-repair
519
+ // / healer strategies that could actually fix the failure would never run.
520
+ const remainingTimeMs = getRemainingTimeMs(globalDeadlineMs);
468
521
  if (remainingTimeMs <= 0) {
469
522
  return {
470
523
  opcodeIndex: index,
@@ -490,6 +543,8 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
490
543
  }
491
544
  const recovery = await recoveryChain.attempt(opcode, index, adapter, {
492
545
  remainingTimeMs,
546
+ globalDeadlineMs,
547
+ getProgress,
493
548
  maxDeterministicRetries: Math.max(0, opcode.maxFailures - breakerState.opcodeFailures),
494
549
  currentVariant,
495
550
  allowPageReload: !executionState.activeClip,
@@ -511,7 +566,7 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
511
566
  if (recovery.patch) {
512
567
  healerPatches.push(recovery.patch);
513
568
  }
514
- const postconditionBudgetMs = getRemainingTimeMs(deadlineMs);
569
+ const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
515
570
  if (postconditionBudgetMs <= 0) {
516
571
  return {
517
572
  opcodeIndex: index,
@@ -523,7 +578,10 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
523
578
  };
524
579
  }
525
580
  const runtimePostcondition = resolveRuntimePostcondition(opcode);
526
- const postcondition = await evaluatePostcondition(adapter, withClampedPostconditionTimeout(runtimePostcondition, postconditionBudgetMs));
581
+ const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, Date.now(), globalDeadlineMs, getProgress);
582
+ if (postcondition.lowConfidence) {
583
+ recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
584
+ }
527
585
  if (!postcondition.passed) {
528
586
  return {
529
587
  opcodeIndex: index,
@@ -536,6 +594,9 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
536
594
  }
537
595
  if (actionEffectPolicy.captureBefore) {
538
596
  const verification = await verifier.verifyAfterAction(adapter);
597
+ if (verification.lowConfidence) {
598
+ recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
599
+ }
539
600
  if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
540
601
  return {
541
602
  opcodeIndex: index,
@@ -611,13 +672,34 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
611
672
  assertSurfacePostconditionSource(opcode);
612
673
  return evaluateSurfaceAssertion(adapter, opcode.selectors, opcode.matchAll);
613
674
  case 'CAPTURE_SCREENSHOT': {
614
- const stability = await smartWaitForStability(adapter, { maxWaitMs: 5000 });
675
+ // AUT-240 (Layer B): stabilize visually before capture but NEVER fail the
676
+ // capture on it. Prefer the adapter's adaptive stabilizer (fonts/images/
677
+ // semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
678
+ // legacy smart-wait for adapters that don't implement it. A page that
679
+ // never fully settles (e.g. a perpetual animation) is captured anyway.
680
+ const stabilize = (maxWaitMs) => adapter.waitForVisuallyStable
681
+ ? adapter.waitForVisuallyStable({ maxWaitMs })
682
+ : smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
683
+ stable: r.stable,
684
+ reason: r.waitedFor.join(', ') || 'unknown',
685
+ }));
686
+ const stability = await stabilize(5000);
687
+ if (!stability.stable) {
688
+ logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
689
+ }
690
+ // AUT-240 (Layer 4): flag the capture low-confidence if a faux-vert was
691
+ // assumed-OK earlier in this variant, or if the page never reached a
692
+ // visually-stable state before this shot. "Assume OK, but flag it." Keep
693
+ // both reasons when both apply (don't let one mask the other).
694
+ const captureLowConfidence = Boolean(executionState.lowConfidence) || !stability.stable;
695
+ const lowConfidenceReasons = [];
696
+ if (executionState.lowConfidence && executionState.lowConfidenceReason) {
697
+ lowConfidenceReasons.push(executionState.lowConfidenceReason);
698
+ }
615
699
  if (!stability.stable) {
616
- return {
617
- success: false,
618
- error: `page not stable before screenshot; unresolved loaders: ${stability.waitedFor.join(', ') || 'unknown'}`,
619
- };
700
+ lowConfidenceReasons.push(`captured before visual stability: ${stability.reason}`);
620
701
  }
702
+ const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
621
703
  const captureUrl = await adapter.getCurrentUrl();
622
704
  const takeBuffer = async () => {
623
705
  if (opcode.elementSelector && adapter.takeElementScreenshot) {
@@ -648,7 +730,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
648
730
  });
649
731
  }
650
732
  if (!verification.passed) {
651
- await smartWaitForStability(adapter, { maxWaitMs: 8000 });
733
+ await stabilize(8000);
652
734
  const retryBuffer = await takeBuffer();
653
735
  const retryVerification = await verifyCaptureQuality(retryBuffer, {
654
736
  expectedDescription: opcode.description,
@@ -733,6 +815,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
733
815
  variantId: currentVariant?.id,
734
816
  tabIconData,
735
817
  tabIconMimeType,
818
+ lowConfidence: captureLowConfidence || undefined,
819
+ lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
736
820
  });
737
821
  break;
738
822
  }
@@ -781,6 +865,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
781
865
  stepDescription: opcode.description,
782
866
  stepIndex: opcodeIndex,
783
867
  variantId: currentVariant?.id,
868
+ // AUT-241 — full-load / unexpected-nav warnings seen during this take.
869
+ warnings: recording.warnings,
784
870
  });
785
871
  break;
786
872
  }
@@ -888,11 +974,16 @@ function resolveClipIdentity(activeClip, opcode) {
888
974
  clipName: opcode.clipName ?? activeClip?.clipName ?? opcode.description,
889
975
  };
890
976
  }
891
- function withClampedPostconditionTimeout(spec, maxWaitMs) {
892
- return {
893
- ...spec,
894
- waitMs: Math.max(1, Math.min(spec.waitMs ?? maxWaitMs, maxWaitMs)),
895
- };
977
+ /**
978
+ * Bind the adapter's optional progress probe for the watchdog, or return
979
+ * undefined when the adapter has none (graceful degradation: waits then fall
980
+ * back to fixed compiled budgets). See `runWithProgressBudget`.
981
+ */
982
+ function makeProgressGetter(adapter) {
983
+ if (!adapter.getProgressSnapshot)
984
+ return undefined;
985
+ const getSnapshot = adapter.getProgressSnapshot.bind(adapter);
986
+ return () => getSnapshot();
896
987
  }
897
988
  function evaluateImmediateAssertion(result, prefix) {
898
989
  return result.passed
@@ -4,13 +4,28 @@
4
4
  * Deterministic evaluation of postconditions after each opcode.
5
5
  * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
6
6
  */
7
- import type { RuntimeAdapter, PostconditionSpec } from './execution-types.js';
7
+ import type { RuntimeAdapter, PostconditionSpec, ProgressSnapshot } from './execution-types.js';
8
8
  /**
9
9
  * Evaluates whether a postcondition holds.
10
10
  * Retries internally up to postcondition.waitMs (polling).
11
11
  * Returns true if the condition is satisfied, false otherwise.
12
12
  */
13
- export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<{
13
+ export interface PostconditionResult {
14
14
  passed: boolean;
15
15
  reason: string;
16
- }>;
16
+ /**
17
+ * AUT-240 (decision 2): the check could not be verified deterministically
18
+ * (an AKTree probe kept throwing) and was assumed-OK as a last resort. The
19
+ * capture is flagged low-confidence rather than failed.
20
+ */
21
+ lowConfidence?: boolean;
22
+ }
23
+ export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<PostconditionResult>;
24
+ /**
25
+ * Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
26
+ * gets a generous budget up to the global deadline and the progress watchdog
27
+ * cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
28
+ * remaining-budget that could starve the check to ~1ms after a slow action.
29
+ * Shared by the runner (main path) and the recovery chain (retry re-check).
30
+ */
31
+ export declare function evaluatePostconditionWithProgress(adapter: RuntimeAdapter, spec: PostconditionSpec, startedAtMs: number, globalDeadlineMs: number, getProgress: (() => Promise<ProgressSnapshot | null>) | undefined): Promise<PostconditionResult>;
@@ -4,12 +4,7 @@
4
4
  * Deterministic evaluation of postconditions after each opcode.
5
5
  * No LLM calls — purely structural checks against AKTree, URL, and screenshots.
6
6
  */
7
- import { serializeAKTree } from './ak-tree.js';
8
- /**
9
- * Evaluates whether a postcondition holds.
10
- * Retries internally up to postcondition.waitMs (polling).
11
- * Returns true if the condition is satisfied, false otherwise.
12
- */
7
+ import { runWithProgressBudget } from './wait-contract.js';
13
8
  export async function evaluatePostcondition(adapter, spec) {
14
9
  const maxWait = spec.waitMs ?? 5000;
15
10
  const pollInterval = 500;
@@ -31,6 +26,29 @@ export async function evaluatePostcondition(adapter, spec) {
31
26
  // Final check after timeout
32
27
  return checkOnce(adapter, spec, context);
33
28
  }
29
+ /**
30
+ * Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
31
+ * gets a generous budget up to the global deadline and the progress watchdog
32
+ * cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
33
+ * remaining-budget that could starve the check to ~1ms after a slow action.
34
+ * Shared by the runner (main path) and the recovery chain (retry re-check).
35
+ */
36
+ export async function evaluatePostconditionWithProgress(adapter, spec, startedAtMs, globalDeadlineMs, getProgress) {
37
+ // Immediate specs need no adaptive budget.
38
+ if (spec.type === 'always') {
39
+ return evaluatePostcondition(adapter, spec);
40
+ }
41
+ const compiledWaitMs = spec.waitMs ?? 5000;
42
+ const waited = await runWithProgressBudget((budgetMs) => evaluatePostcondition(adapter, { ...spec, waitMs: Math.max(1, Math.round(budgetMs)) }), { startedAtMs, globalDeadlineMs, minBudgetMs: compiledWaitMs, getProgress });
43
+ if (waited.result)
44
+ return waited.result;
45
+ return {
46
+ passed: false,
47
+ reason: waited.cut === 'stuck'
48
+ ? `not met (page stuck, no progress for ${Math.round(waited.waitedMs)}ms)`
49
+ : 'not met (global wait deadline reached)',
50
+ };
51
+ }
34
52
  async function checkOnce(adapter, spec, context) {
35
53
  switch (spec.type) {
36
54
  case 'route_matches':
@@ -117,16 +135,15 @@ async function checkElementVisible(adapter, selector) {
117
135
  catch {
118
136
  // Fall through to AKTree check
119
137
  }
120
- // Fallback: check AKTree
138
+ // Fallback: a visible node matching the selector in the AKTree.
139
+ // (AUT-240, Layer A: the old `serializeAKTree().includes(selector)` fallback
140
+ // was dropped — a substring match on the serialized tree produced false
141
+ // positives.)
121
142
  try {
122
143
  const tree = await adapter.getAKTree();
123
144
  if (hasVisibleNodeWithSelector(tree, selector)) {
124
145
  return { passed: true, reason: `element "${selector}" is visible in AKTree` };
125
146
  }
126
- const serialized = serializeAKTree(tree);
127
- if (serialized.includes(selector.replace(/[[\]"]/g, ''))) {
128
- return { passed: true, reason: `element pattern "${selector}" found in serialized AKTree` };
129
- }
130
147
  return { passed: false, reason: `element "${selector}" not visible` };
131
148
  }
132
149
  catch {
@@ -147,6 +164,23 @@ async function checkElementAbsent(adapter, selector) {
147
164
  }
148
165
  }
149
166
  async function checkTextContains(adapter, selector, expectedText) {
167
+ const expected = normalizeText(expectedText);
168
+ // Playwright-first (AUT-240, Layer A): read the live DOM text.
169
+ if (adapter.getTextContent) {
170
+ try {
171
+ const live = await adapter.getTextContent(selector);
172
+ if (live !== null && normalizeText(live).includes(expected)) {
173
+ return { passed: true, reason: `element "${selector}" contains "${expectedText}" (Playwright)` };
174
+ }
175
+ // Element found but text didn't match (or selector missed): fall through
176
+ // to the AKTree, which may surface label/value/aria text the raw
177
+ // textContent omits.
178
+ }
179
+ catch {
180
+ // Fall through to AKTree.
181
+ }
182
+ }
183
+ // Fallback: AKTree (label / value / own text).
150
184
  try {
151
185
  const tree = await adapter.getAKTree();
152
186
  const node = findNodeBySelector(tree, selector);
@@ -158,9 +192,8 @@ async function checkTextContains(adapter, selector, expectedText) {
158
192
  node.value || '',
159
193
  node.attributes.__ownText || '',
160
194
  ].join(' '));
161
- const expected = normalizeText(expectedText);
162
195
  if (nodeText.includes(expected)) {
163
- return { passed: true, reason: `element "${selector}" contains "${expectedText}"` };
196
+ return { passed: true, reason: `element "${selector}" contains "${expectedText}" (AKTree)` };
164
197
  }
165
198
  return { passed: false, reason: `element "${selector}" text "${nodeText}" does not contain "${expectedText}"` };
166
199
  }
@@ -168,24 +201,39 @@ async function checkTextContains(adapter, selector, expectedText) {
168
201
  return { passed: false, reason: `error checking text: ${err}` };
169
202
  }
170
203
  }
204
+ function evaluateOverlayTree(tree) {
205
+ if (tree.overlays.length === 0) {
206
+ return { passed: true, reason: 'no overlays detected' };
207
+ }
208
+ const blocking = tree.overlays.filter(o => o.blocksInteraction);
209
+ if (blocking.length === 0) {
210
+ return { passed: true, reason: 'overlays present but none blocking interaction' };
211
+ }
212
+ return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
213
+ }
171
214
  async function checkOverlayDismissed(adapter) {
172
215
  try {
173
- const tree = await adapter.getAKTree();
174
- // Check if any overlays are reported in the tree
175
- if (tree.overlays.length === 0) {
176
- return { passed: true, reason: 'no overlays detected' };
177
- }
178
- // Check if remaining overlays are blocking
179
- const blocking = tree.overlays.filter(o => o.blocksInteraction);
180
- if (blocking.length === 0) {
181
- return { passed: true, reason: 'overlays present but none blocking interaction' };
182
- }
183
- return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
216
+ return evaluateOverlayTree(await adapter.getAKTree());
184
217
  }
185
218
  catch {
186
- // If AKTree is unavailable (e.g. page.evaluate failure), assume overlays are dismissed.
187
- // The overlay dismissal itself ran; we just can't verify via AKTree.
188
- return { passed: true, reason: 'overlay check skipped (AKTree unavailable), assuming dismissed' };
219
+ // AUT-240 (decision 2): "assume OK, but smart". A first `page.evaluate`
220
+ // hiccup (e.g. navigation in flight) is no longer assumed-OK immediately
221
+ // settle the page and retry the AKTree once.
222
+ try {
223
+ if (adapter.waitForVisuallyStable) {
224
+ await adapter.waitForVisuallyStable({ maxWaitMs: 2000 });
225
+ }
226
+ return evaluateOverlayTree(await adapter.getAKTree());
227
+ }
228
+ catch {
229
+ // Still unverifiable: assume dismissed as a last resort, but flag
230
+ // low-confidence so the post-capture verification scrutinizes it.
231
+ return {
232
+ passed: true,
233
+ reason: 'overlay check unverifiable after settle; assuming dismissed (low-confidence)',
234
+ lowConfidence: true,
235
+ };
236
+ }
189
237
  }
190
238
  }
191
239
  async function checkScreenshotStable(adapter, threshold, context) {
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Capture Agent — Program content hashing (run provenance)
3
+ *
4
+ * Stable content hash of an ExecutionProgram, persisted as `program_hash` on
5
+ * each run so a screenshot can be traced back to the exact program bytes that
6
+ * produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
7
+ * the schema validation import chain.
8
+ */
9
+ import type { ExecutionProgram } from './execution-types.js';
10
+ /** sha256 of the canonicalized program (stable across runs of the same program). */
11
+ export declare function hashProgram(program: ExecutionProgram): string;
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Capture Agent — Program content hashing (run provenance)
3
+ *
4
+ * Stable content hash of an ExecutionProgram, persisted as `program_hash` on
5
+ * each run so a screenshot can be traced back to the exact program bytes that
6
+ * produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
7
+ * the schema validation import chain.
8
+ */
9
+ import { createHash } from 'node:crypto';
10
+ /** Deterministic JSON serialization with object keys sorted recursively. */
11
+ function stableStringify(value) {
12
+ if (value === undefined)
13
+ return 'null';
14
+ if (value === null || typeof value !== 'object')
15
+ return JSON.stringify(value);
16
+ if (Array.isArray(value))
17
+ return `[${value.map(stableStringify).join(',')}]`;
18
+ const obj = value;
19
+ const entries = Object.keys(obj)
20
+ .sort()
21
+ .map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`);
22
+ return `{${entries.join(',')}}`;
23
+ }
24
+ /** sha256 of the canonicalized program (stable across runs of the same program). */
25
+ export function hashProgram(program) {
26
+ return createHash('sha256').update(stableStringify(program)).digest('hex');
27
+ }
28
+ //# sourceMappingURL=program-hash.js.map
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Capture Agent — Program FORM migrations (migrate-on-read)
3
+ *
4
+ * Old presets are stored at whatever `programSchemaVersion` (FORM) was current
5
+ * when they were authored. `upgradeProgram` runs a chain of pure
6
+ * `migrate_vN→vN+1` functions to bring any stored program up to the current
7
+ * form BEFORE strict schema validation, so the runner only ever sees one shape.
8
+ *
9
+ * Properties of this layer (decisions locked in AUT-242):
10
+ * - Compat forever: the chain is kept indefinitely; no support window.
11
+ * - Migrate-on-read only: programs are NEVER rewritten back to storage. The
12
+ * stored form changes only when the generator recompiles (create/modify).
13
+ * - Pure + idempotent: a program already at the current form is a no-op.
14
+ *
15
+ * This module is intentionally free of Node-only imports so it can be pulled
16
+ * into the schema validation chain on any runtime. Content hashing
17
+ * (`node:crypto`) lives in program-hash.ts.
18
+ */
19
+ /**
20
+ * Reads the FORM version a raw (pre-migration) program was stored at.
21
+ * Absent / non-finite ⇒ 0 (the oldest form). Used to stamp run provenance
22
+ * (`program_schema_version_origin`) before `upgradeProgram` bumps it.
23
+ */
24
+ export declare function readOriginSchemaVersion(raw: unknown): number;
25
+ /**
26
+ * Brings any stored program up to {@link CURRENT_PROGRAM_SCHEMA_VERSION} (form)
27
+ * before strict validation. Pure: clones, never mutates `raw`. Idempotent: a
28
+ * program already at the current form is returned with only its version stamped.
29
+ * Non-object input is returned untouched so the schema raises a clean error.
30
+ */
31
+ export declare function upgradeProgram(raw: unknown): unknown;