autokap 1.8.6 → 1.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/action-verifier.d.ts +6 -0
- package/dist/action-verifier.js +30 -17
- package/dist/browser.d.ts +59 -0
- package/dist/browser.js +259 -0
- package/dist/cli-config.js +7 -12
- package/dist/cli-contract.d.ts +5 -9
- package/dist/cli-contract.js +11 -38
- package/dist/cli-runner.d.ts +0 -1
- package/dist/cli-runner.js +74 -59
- package/dist/cli.js +7 -7
- package/dist/clip-capture-loop.d.ts +28 -7
- package/dist/clip-capture-loop.js +102 -19
- package/dist/engine-version.d.ts +24 -0
- package/dist/engine-version.js +25 -0
- package/dist/execution-schema.d.ts +22 -0
- package/dist/execution-schema.js +59 -8
- package/dist/execution-types.d.ts +116 -0
- package/dist/opcode-runner.d.ts +8 -1
- package/dist/opcode-runner.js +120 -29
- package/dist/postcondition.d.ts +18 -3
- package/dist/postcondition.js +75 -27
- package/dist/program-hash.d.ts +11 -0
- package/dist/program-hash.js +28 -0
- package/dist/program-migrations.d.ts +31 -0
- package/dist/program-migrations.js +93 -0
- package/dist/program-signing.d.ts +11 -0
- package/dist/program-signing.js +1 -0
- package/dist/recovery-chain.js +8 -11
- package/dist/scenario-cookie.d.ts +36 -0
- package/dist/scenario-cookie.js +62 -0
- package/dist/security.d.ts +21 -0
- package/dist/security.js +46 -8
- package/dist/server-credit-usage.d.ts +1 -1
- package/dist/version.d.ts +1 -0
- package/dist/version.js +1 -0
- package/dist/video-narration-schema.d.ts +3 -0
- package/dist/video-narration-schema.js +3 -0
- package/dist/wait-contract.d.ts +104 -0
- package/dist/wait-contract.js +144 -0
- package/dist/web-playwright-local.d.ts +9 -1
- package/dist/web-playwright-local.js +0 -0
- package/package.json +2 -2
- package/readme.md +9 -15
package/dist/opcode-runner.js
CHANGED
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
* delegates to recovery chain on failure, and respects circuit breaker.
|
|
7
7
|
*/
|
|
8
8
|
import { isSoftOpcodeKind } from './execution-types.js';
|
|
9
|
-
import { evaluatePostcondition } from './postcondition.js';
|
|
9
|
+
import { evaluatePostcondition, evaluatePostconditionWithProgress } from './postcondition.js';
|
|
10
|
+
import { WAIT_CONTRACT_VERSION, resolveGlobalWaitDeadlineMs, runWithProgressBudget, } from './wait-contract.js';
|
|
10
11
|
import { ActionVerifier } from './action-verifier.js';
|
|
11
12
|
import { CircuitBreaker } from './circuit-breaker.js';
|
|
12
13
|
import { smartWaitForStability } from './smart-wait.js';
|
|
@@ -98,6 +99,14 @@ function resolveRuntimePostcondition(opcode) {
|
|
|
98
99
|
}
|
|
99
100
|
return opcode.postcondition;
|
|
100
101
|
}
|
|
102
|
+
/** Mark the variant low-confidence once (keeps the first reason). */
|
|
103
|
+
function recordLowConfidence(state, reason) {
|
|
104
|
+
if (state.lowConfidence)
|
|
105
|
+
return;
|
|
106
|
+
state.lowConfidence = true;
|
|
107
|
+
state.lowConfidenceReason = reason;
|
|
108
|
+
logger.debug(`[run] low-confidence flagged: ${reason}`);
|
|
109
|
+
}
|
|
101
110
|
// ── Main execution function ─────────────────────────────────────────
|
|
102
111
|
export async function executeProgram(program, createAdapter, options = {}) {
|
|
103
112
|
const recoveryChain = options.recoveryChain ?? new NoOpRecoveryChain();
|
|
@@ -117,6 +126,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
|
|
|
117
126
|
healerInvocations: 0,
|
|
118
127
|
circuitBreakerTrips: 0,
|
|
119
128
|
};
|
|
129
|
+
logger.debug(`[run] wait contract v${WAIT_CONTRACT_VERSION} (adaptive budgets + visual stability)`);
|
|
120
130
|
let nextVariantIndex = 0;
|
|
121
131
|
const workerCount = Math.min(maxParallelVariants, program.variants.length);
|
|
122
132
|
const workers = Array.from({ length: workerCount }, async () => {
|
|
@@ -148,6 +158,9 @@ export async function executeProgram(program, createAdapter, options = {}) {
|
|
|
148
158
|
const aborted = options.abortSignal?.aborted && completedVariantResults.length < program.variants.length;
|
|
149
159
|
const success = !aborted && completedVariantResults.length > 0 && completedVariantResults.every(v => v.success);
|
|
150
160
|
const detectedAppVersion = completedVariantResults.reduce((acc, variantResult) => acc ?? (variantResult.detectedAppVersion ?? null), null);
|
|
161
|
+
// AUT-241 — surface (don't mask) cuts: aggregate every recording warning from
|
|
162
|
+
// each variant's clip/video artifacts. Diagnostic only; never affects success.
|
|
163
|
+
const aggregatedWarnings = completedVariantResults.flatMap((v) => v.artifacts.flatMap((a) => a.warnings ?? []));
|
|
151
164
|
return {
|
|
152
165
|
programId: program.presetId,
|
|
153
166
|
success,
|
|
@@ -157,6 +170,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
|
|
|
157
170
|
opcodeTimings,
|
|
158
171
|
totalDurationMs: Date.now() - startTime,
|
|
159
172
|
detectedAppVersion,
|
|
173
|
+
warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
|
|
160
174
|
error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
|
|
161
175
|
};
|
|
162
176
|
}
|
|
@@ -302,6 +316,13 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
302
316
|
const startTime = Date.now();
|
|
303
317
|
const effectiveTimeoutMs = resolveOpcodeTimeoutMs(opcode);
|
|
304
318
|
const deadlineMs = startTime + effectiveTimeoutMs;
|
|
319
|
+
// AUT-240 (Layer C): a hard, per-media global deadline for adaptive waits,
|
|
320
|
+
// separate from the compiled per-opcode timeout. The compiled timeout acts as
|
|
321
|
+
// a FLOOR (never a ceiling), so `WAIT_FOR` and postconditions can extend while
|
|
322
|
+
// the page is still progressing, up to this deadline. Interactions stay on the
|
|
323
|
+
// narrow `deadlineMs` (Playwright auto-waiting handles those).
|
|
324
|
+
const globalDeadlineMs = resolveGlobalWaitDeadlineMs(startTime, effectiveTimeoutMs, artifactPlan.mediaMode);
|
|
325
|
+
const getProgress = makeProgressGetter(adapter);
|
|
305
326
|
const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
|
|
306
327
|
const isSoft = isSoftOpcodeKind(opcode.kind);
|
|
307
328
|
// Track page context for circuit breaker
|
|
@@ -318,21 +339,41 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
318
339
|
await verifier.captureBeforeState(adapter);
|
|
319
340
|
logger.debug(`[opcode ${index}] captureBeforeState took ${Date.now() - beforeStart}ms`);
|
|
320
341
|
}
|
|
321
|
-
|
|
342
|
+
// `WAIT_FOR` is a pure wait: it extends while the page is progressing, up to
|
|
343
|
+
// the global deadline. All other opcodes are interactions bounded by the
|
|
344
|
+
// narrow per-opcode deadline (Playwright auto-waiting covers them).
|
|
345
|
+
const isPureWait = opcode.kind === 'WAIT_FOR';
|
|
346
|
+
const actionDeadlineMs = isPureWait ? globalDeadlineMs : deadlineMs;
|
|
347
|
+
const actionBudgetMs = getRemainingTimeMs(actionDeadlineMs);
|
|
322
348
|
if (actionBudgetMs <= 0) {
|
|
323
349
|
const reason = `timeout after ${effectiveTimeoutMs}ms`;
|
|
324
|
-
logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${
|
|
350
|
+
logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${actionDeadlineMs}, now=${Date.now()})`);
|
|
325
351
|
if (isSoft)
|
|
326
352
|
return softSkipResult(opcode, index, startTime, reason, telemetry);
|
|
327
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
|
|
353
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
|
|
328
354
|
}
|
|
329
355
|
// For mediaMode='video', capture pre-action timing + bbox metadata inside
|
|
330
356
|
// the active clip window only. Opcodes outside a clip are not part of the
|
|
331
357
|
// video output.
|
|
332
358
|
const preTiming = await capturePreActionTiming(opcode, adapter, executionState.activeClip, artifactPlan);
|
|
333
|
-
logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms`);
|
|
359
|
+
logger.debug(`[opcode ${index}] action exec start — actionBudget ${actionBudgetMs}ms${isPureWait ? ' (adaptive)' : ''}`);
|
|
334
360
|
const actionStart = Date.now();
|
|
335
|
-
|
|
361
|
+
let result;
|
|
362
|
+
if (opcode.kind === 'WAIT_FOR' && getProgress) {
|
|
363
|
+
// Extend-on-progress: give the wait a generous budget (up to the global
|
|
364
|
+
// deadline) and let the watchdog cut it early only when the page is truly
|
|
365
|
+
// stuck. A slow-but-progressing page no longer trips a fixed timeout.
|
|
366
|
+
const waited = await runWithProgressBudget((budgetMs) => executeOpcodeAction({ ...opcode, timeoutMs: Math.max(1, Math.round(budgetMs)) }, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), { startedAtMs: startTime, globalDeadlineMs, minBudgetMs: effectiveTimeoutMs, getProgress });
|
|
367
|
+
result = waited.result ?? {
|
|
368
|
+
success: false,
|
|
369
|
+
error: waited.cut === 'stuck'
|
|
370
|
+
? `WAIT_FOR cut: page stuck (no progress for ${Math.round(waited.waitedMs)}ms)`
|
|
371
|
+
: `WAIT_FOR cut: global deadline reached, page never settled`,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
result = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), actionBudgetMs);
|
|
376
|
+
}
|
|
336
377
|
logger.debug(`[opcode ${index}] action exec end — took ${Date.now() - actionStart}ms, success=${result.success}${result.error ? `, error=${result.error}` : ''}`);
|
|
337
378
|
if (preTiming) {
|
|
338
379
|
const keystrokeOffsetsMs = result.keystrokeTimestampsMs && result.keystrokeTimestampsMs.length > 0
|
|
@@ -358,39 +399,46 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
358
399
|
const reason = result.error ?? 'action failed';
|
|
359
400
|
if (isSoft)
|
|
360
401
|
return softSkipResult(opcode, index, startTime, reason, telemetry);
|
|
361
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
|
|
402
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
|
|
362
403
|
}
|
|
363
|
-
// Verify postcondition
|
|
364
|
-
|
|
404
|
+
// Verify postcondition — extend-on-progress up to the global deadline so a
|
|
405
|
+
// slow action no longer starves it (failure mode #3: clamped to ~1ms).
|
|
406
|
+
const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
|
|
365
407
|
if (postconditionBudgetMs <= 0) {
|
|
366
408
|
const reason = `timeout after ${effectiveTimeoutMs}ms`;
|
|
367
409
|
logger.debug(`[opcode ${index}] no budget left for postcondition check`);
|
|
368
410
|
if (isSoft)
|
|
369
411
|
return softSkipResult(opcode, index, startTime, reason, telemetry);
|
|
370
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
|
|
412
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
|
|
371
413
|
}
|
|
372
414
|
const runtimePostcondition = resolveRuntimePostcondition(opcode);
|
|
373
415
|
const postStart = Date.now();
|
|
374
|
-
const postcondition = await
|
|
416
|
+
const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, postStart, globalDeadlineMs, getProgress);
|
|
375
417
|
logger.debug(`[opcode ${index}] postcondition (${runtimePostcondition.type}) took ${Date.now() - postStart}ms — passed=${postcondition.passed}, reason="${postcondition.reason}"`);
|
|
418
|
+
if (postcondition.lowConfidence) {
|
|
419
|
+
recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
|
|
420
|
+
}
|
|
376
421
|
if (!postcondition.passed) {
|
|
377
422
|
const reason = `postcondition failed: ${postcondition.reason}`;
|
|
378
423
|
if (isSoft)
|
|
379
424
|
return softSkipResult(opcode, index, startTime, reason, telemetry);
|
|
380
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, reason);
|
|
425
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
|
|
381
426
|
}
|
|
382
427
|
// Verify action effects through the shared policy. Weak `any_change`
|
|
383
428
|
// postconditions are only meaningful if this verifier observes a real
|
|
384
429
|
// URL/tree/state/scroll change.
|
|
385
430
|
if (actionEffectPolicy.captureBefore) {
|
|
386
431
|
const verification = await verifier.verifyAfterAction(adapter);
|
|
432
|
+
if (verification.lowConfidence) {
|
|
433
|
+
recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
|
|
434
|
+
}
|
|
387
435
|
if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
|
|
388
436
|
if (opcode.kind === 'PRESS_KEY' && actionEffectPolicy.noEffectMode === 'allow') {
|
|
389
437
|
logger.debug(`[opcode ${index}] PRESS_KEY had no DOM effect (${verification.summary}) — ` +
|
|
390
438
|
`postcondition passed, treating as redundant-but-successful`);
|
|
391
439
|
}
|
|
392
440
|
else {
|
|
393
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
|
|
441
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
|
|
394
442
|
}
|
|
395
443
|
}
|
|
396
444
|
}
|
|
@@ -424,7 +472,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
424
472
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
425
473
|
if (isSoft)
|
|
426
474
|
return softSkipResult(opcode, index, startTime, errorMsg, telemetry);
|
|
427
|
-
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg);
|
|
475
|
+
return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg);
|
|
428
476
|
}
|
|
429
477
|
}
|
|
430
478
|
/** Post-action breathing room (ms) injected between visible interactions
|
|
@@ -450,8 +498,9 @@ function sleep(ms) {
|
|
|
450
498
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
451
499
|
}
|
|
452
500
|
// ── Failure handling with recovery ──────────────────────────────────
|
|
453
|
-
async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, effectiveTimeoutMs, errorMsg) {
|
|
501
|
+
async function handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg) {
|
|
454
502
|
const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
|
|
503
|
+
const getProgress = makeProgressGetter(adapter);
|
|
455
504
|
const breakerState = breaker.recordFailure(index, opcode.maxFailures);
|
|
456
505
|
if (breakerState.tripped) {
|
|
457
506
|
telemetry.circuitBreakerTrips++;
|
|
@@ -464,7 +513,11 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
|
|
|
464
513
|
error: `${errorMsg} (circuit breaker: ${breakerState.reason})`,
|
|
465
514
|
};
|
|
466
515
|
}
|
|
467
|
-
|
|
516
|
+
// AUT-240 (Phase 5): recovery budgets to the GLOBAL deadline, not the narrow
|
|
517
|
+
// compiled one. Otherwise an adaptive WAIT_FOR that consumed its compiled
|
|
518
|
+
// budget would leave zero for recovery (failure mode #1) — and selector-repair
|
|
519
|
+
// / healer strategies that could actually fix the failure would never run.
|
|
520
|
+
const remainingTimeMs = getRemainingTimeMs(globalDeadlineMs);
|
|
468
521
|
if (remainingTimeMs <= 0) {
|
|
469
522
|
return {
|
|
470
523
|
opcodeIndex: index,
|
|
@@ -490,6 +543,8 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
|
|
|
490
543
|
}
|
|
491
544
|
const recovery = await recoveryChain.attempt(opcode, index, adapter, {
|
|
492
545
|
remainingTimeMs,
|
|
546
|
+
globalDeadlineMs,
|
|
547
|
+
getProgress,
|
|
493
548
|
maxDeterministicRetries: Math.max(0, opcode.maxFailures - breakerState.opcodeFailures),
|
|
494
549
|
currentVariant,
|
|
495
550
|
allowPageReload: !executionState.activeClip,
|
|
@@ -511,7 +566,7 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
|
|
|
511
566
|
if (recovery.patch) {
|
|
512
567
|
healerPatches.push(recovery.patch);
|
|
513
568
|
}
|
|
514
|
-
const postconditionBudgetMs = getRemainingTimeMs(
|
|
569
|
+
const postconditionBudgetMs = getRemainingTimeMs(globalDeadlineMs);
|
|
515
570
|
if (postconditionBudgetMs <= 0) {
|
|
516
571
|
return {
|
|
517
572
|
opcodeIndex: index,
|
|
@@ -523,7 +578,10 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
|
|
|
523
578
|
};
|
|
524
579
|
}
|
|
525
580
|
const runtimePostcondition = resolveRuntimePostcondition(opcode);
|
|
526
|
-
const postcondition = await
|
|
581
|
+
const postcondition = await evaluatePostconditionWithProgress(adapter, runtimePostcondition, Date.now(), globalDeadlineMs, getProgress);
|
|
582
|
+
if (postcondition.lowConfidence) {
|
|
583
|
+
recordLowConfidence(executionState, `postcondition ${runtimePostcondition.type}: ${postcondition.reason}`);
|
|
584
|
+
}
|
|
527
585
|
if (!postcondition.passed) {
|
|
528
586
|
return {
|
|
529
587
|
opcodeIndex: index,
|
|
@@ -536,6 +594,9 @@ async function handleFailure(opcode, index, adapter, verifier, breaker, recovery
|
|
|
536
594
|
}
|
|
537
595
|
if (actionEffectPolicy.captureBefore) {
|
|
538
596
|
const verification = await verifier.verifyAfterAction(adapter);
|
|
597
|
+
if (verification.lowConfidence) {
|
|
598
|
+
recordLowConfidence(executionState, `action effect unverified: ${verification.summary}`);
|
|
599
|
+
}
|
|
539
600
|
if (!verification.hadEffect && actionEffectPolicy.requireEffect) {
|
|
540
601
|
return {
|
|
541
602
|
opcodeIndex: index,
|
|
@@ -611,13 +672,34 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
611
672
|
assertSurfacePostconditionSource(opcode);
|
|
612
673
|
return evaluateSurfaceAssertion(adapter, opcode.selectors, opcode.matchAll);
|
|
613
674
|
case 'CAPTURE_SCREENSHOT': {
|
|
614
|
-
|
|
675
|
+
// AUT-240 (Layer B): stabilize visually before capture but NEVER fail the
|
|
676
|
+
// capture on it. Prefer the adapter's adaptive stabilizer (fonts/images/
|
|
677
|
+
// semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
|
|
678
|
+
// legacy smart-wait for adapters that don't implement it. A page that
|
|
679
|
+
// never fully settles (e.g. a perpetual animation) is captured anyway.
|
|
680
|
+
const stabilize = (maxWaitMs) => adapter.waitForVisuallyStable
|
|
681
|
+
? adapter.waitForVisuallyStable({ maxWaitMs })
|
|
682
|
+
: smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
|
|
683
|
+
stable: r.stable,
|
|
684
|
+
reason: r.waitedFor.join(', ') || 'unknown',
|
|
685
|
+
}));
|
|
686
|
+
const stability = await stabilize(5000);
|
|
687
|
+
if (!stability.stable) {
|
|
688
|
+
logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
|
|
689
|
+
}
|
|
690
|
+
// AUT-240 (Layer 4): flag the capture low-confidence if a faux-vert was
|
|
691
|
+
// assumed-OK earlier in this variant, or if the page never reached a
|
|
692
|
+
// visually-stable state before this shot. "Assume OK, but flag it." Keep
|
|
693
|
+
// both reasons when both apply (don't let one mask the other).
|
|
694
|
+
const captureLowConfidence = Boolean(executionState.lowConfidence) || !stability.stable;
|
|
695
|
+
const lowConfidenceReasons = [];
|
|
696
|
+
if (executionState.lowConfidence && executionState.lowConfidenceReason) {
|
|
697
|
+
lowConfidenceReasons.push(executionState.lowConfidenceReason);
|
|
698
|
+
}
|
|
615
699
|
if (!stability.stable) {
|
|
616
|
-
|
|
617
|
-
success: false,
|
|
618
|
-
error: `page not stable before screenshot; unresolved loaders: ${stability.waitedFor.join(', ') || 'unknown'}`,
|
|
619
|
-
};
|
|
700
|
+
lowConfidenceReasons.push(`captured before visual stability: ${stability.reason}`);
|
|
620
701
|
}
|
|
702
|
+
const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
|
|
621
703
|
const captureUrl = await adapter.getCurrentUrl();
|
|
622
704
|
const takeBuffer = async () => {
|
|
623
705
|
if (opcode.elementSelector && adapter.takeElementScreenshot) {
|
|
@@ -648,7 +730,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
648
730
|
});
|
|
649
731
|
}
|
|
650
732
|
if (!verification.passed) {
|
|
651
|
-
await
|
|
733
|
+
await stabilize(8000);
|
|
652
734
|
const retryBuffer = await takeBuffer();
|
|
653
735
|
const retryVerification = await verifyCaptureQuality(retryBuffer, {
|
|
654
736
|
expectedDescription: opcode.description,
|
|
@@ -733,6 +815,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
733
815
|
variantId: currentVariant?.id,
|
|
734
816
|
tabIconData,
|
|
735
817
|
tabIconMimeType,
|
|
818
|
+
lowConfidence: captureLowConfidence || undefined,
|
|
819
|
+
lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
|
|
736
820
|
});
|
|
737
821
|
break;
|
|
738
822
|
}
|
|
@@ -781,6 +865,8 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
781
865
|
stepDescription: opcode.description,
|
|
782
866
|
stepIndex: opcodeIndex,
|
|
783
867
|
variantId: currentVariant?.id,
|
|
868
|
+
// AUT-241 — full-load / unexpected-nav warnings seen during this take.
|
|
869
|
+
warnings: recording.warnings,
|
|
784
870
|
});
|
|
785
871
|
break;
|
|
786
872
|
}
|
|
@@ -888,11 +974,16 @@ function resolveClipIdentity(activeClip, opcode) {
|
|
|
888
974
|
clipName: opcode.clipName ?? activeClip?.clipName ?? opcode.description,
|
|
889
975
|
};
|
|
890
976
|
}
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
977
|
+
/**
|
|
978
|
+
* Bind the adapter's optional progress probe for the watchdog, or return
|
|
979
|
+
* undefined when the adapter has none (graceful degradation: waits then fall
|
|
980
|
+
* back to fixed compiled budgets). See `runWithProgressBudget`.
|
|
981
|
+
*/
|
|
982
|
+
function makeProgressGetter(adapter) {
|
|
983
|
+
if (!adapter.getProgressSnapshot)
|
|
984
|
+
return undefined;
|
|
985
|
+
const getSnapshot = adapter.getProgressSnapshot.bind(adapter);
|
|
986
|
+
return () => getSnapshot();
|
|
896
987
|
}
|
|
897
988
|
function evaluateImmediateAssertion(result, prefix) {
|
|
898
989
|
return result.passed
|
package/dist/postcondition.d.ts
CHANGED
|
@@ -4,13 +4,28 @@
|
|
|
4
4
|
* Deterministic evaluation of postconditions after each opcode.
|
|
5
5
|
* No LLM calls — purely structural checks against AKTree, URL, and screenshots.
|
|
6
6
|
*/
|
|
7
|
-
import type { RuntimeAdapter, PostconditionSpec } from './execution-types.js';
|
|
7
|
+
import type { RuntimeAdapter, PostconditionSpec, ProgressSnapshot } from './execution-types.js';
|
|
8
8
|
/**
|
|
9
9
|
* Evaluates whether a postcondition holds.
|
|
10
10
|
* Retries internally up to postcondition.waitMs (polling).
|
|
11
11
|
* Returns true if the condition is satisfied, false otherwise.
|
|
12
12
|
*/
|
|
13
|
-
export
|
|
13
|
+
export interface PostconditionResult {
|
|
14
14
|
passed: boolean;
|
|
15
15
|
reason: string;
|
|
16
|
-
|
|
16
|
+
/**
|
|
17
|
+
* AUT-240 (decision 2): the check could not be verified deterministically
|
|
18
|
+
* (an AKTree probe kept throwing) and was assumed-OK as a last resort. The
|
|
19
|
+
* capture is flagged low-confidence rather than failed.
|
|
20
|
+
*/
|
|
21
|
+
lowConfidence?: boolean;
|
|
22
|
+
}
|
|
23
|
+
export declare function evaluatePostcondition(adapter: RuntimeAdapter, spec: PostconditionSpec): Promise<PostconditionResult>;
|
|
24
|
+
/**
|
|
25
|
+
* Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
|
|
26
|
+
* gets a generous budget up to the global deadline and the progress watchdog
|
|
27
|
+
* cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
|
|
28
|
+
* remaining-budget that could starve the check to ~1ms after a slow action.
|
|
29
|
+
* Shared by the runner (main path) and the recovery chain (retry re-check).
|
|
30
|
+
*/
|
|
31
|
+
export declare function evaluatePostconditionWithProgress(adapter: RuntimeAdapter, spec: PostconditionSpec, startedAtMs: number, globalDeadlineMs: number, getProgress: (() => Promise<ProgressSnapshot | null>) | undefined): Promise<PostconditionResult>;
|
package/dist/postcondition.js
CHANGED
|
@@ -4,12 +4,7 @@
|
|
|
4
4
|
* Deterministic evaluation of postconditions after each opcode.
|
|
5
5
|
* No LLM calls — purely structural checks against AKTree, URL, and screenshots.
|
|
6
6
|
*/
|
|
7
|
-
import {
|
|
8
|
-
/**
|
|
9
|
-
* Evaluates whether a postcondition holds.
|
|
10
|
-
* Retries internally up to postcondition.waitMs (polling).
|
|
11
|
-
* Returns true if the condition is satisfied, false otherwise.
|
|
12
|
-
*/
|
|
7
|
+
import { runWithProgressBudget } from './wait-contract.js';
|
|
13
8
|
export async function evaluatePostcondition(adapter, spec) {
|
|
14
9
|
const maxWait = spec.waitMs ?? 5000;
|
|
15
10
|
const pollInterval = 500;
|
|
@@ -31,6 +26,29 @@ export async function evaluatePostcondition(adapter, spec) {
|
|
|
31
26
|
// Final check after timeout
|
|
32
27
|
return checkOnce(adapter, spec, context);
|
|
33
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Evaluate a postcondition with extend-on-progress (AUT-240, Layer C): the poll
|
|
31
|
+
* gets a generous budget up to the global deadline and the progress watchdog
|
|
32
|
+
* cuts it only when the page is genuinely stuck. Replaces the old clamp-to-
|
|
33
|
+
* remaining-budget that could starve the check to ~1ms after a slow action.
|
|
34
|
+
* Shared by the runner (main path) and the recovery chain (retry re-check).
|
|
35
|
+
*/
|
|
36
|
+
export async function evaluatePostconditionWithProgress(adapter, spec, startedAtMs, globalDeadlineMs, getProgress) {
|
|
37
|
+
// Immediate specs need no adaptive budget.
|
|
38
|
+
if (spec.type === 'always') {
|
|
39
|
+
return evaluatePostcondition(adapter, spec);
|
|
40
|
+
}
|
|
41
|
+
const compiledWaitMs = spec.waitMs ?? 5000;
|
|
42
|
+
const waited = await runWithProgressBudget((budgetMs) => evaluatePostcondition(adapter, { ...spec, waitMs: Math.max(1, Math.round(budgetMs)) }), { startedAtMs, globalDeadlineMs, minBudgetMs: compiledWaitMs, getProgress });
|
|
43
|
+
if (waited.result)
|
|
44
|
+
return waited.result;
|
|
45
|
+
return {
|
|
46
|
+
passed: false,
|
|
47
|
+
reason: waited.cut === 'stuck'
|
|
48
|
+
? `not met (page stuck, no progress for ${Math.round(waited.waitedMs)}ms)`
|
|
49
|
+
: 'not met (global wait deadline reached)',
|
|
50
|
+
};
|
|
51
|
+
}
|
|
34
52
|
async function checkOnce(adapter, spec, context) {
|
|
35
53
|
switch (spec.type) {
|
|
36
54
|
case 'route_matches':
|
|
@@ -117,16 +135,15 @@ async function checkElementVisible(adapter, selector) {
|
|
|
117
135
|
catch {
|
|
118
136
|
// Fall through to AKTree check
|
|
119
137
|
}
|
|
120
|
-
// Fallback:
|
|
138
|
+
// Fallback: a visible node matching the selector in the AKTree.
|
|
139
|
+
// (AUT-240, Layer A: the old `serializeAKTree().includes(selector)` fallback
|
|
140
|
+
// was dropped — a substring match on the serialized tree produced false
|
|
141
|
+
// positives.)
|
|
121
142
|
try {
|
|
122
143
|
const tree = await adapter.getAKTree();
|
|
123
144
|
if (hasVisibleNodeWithSelector(tree, selector)) {
|
|
124
145
|
return { passed: true, reason: `element "${selector}" is visible in AKTree` };
|
|
125
146
|
}
|
|
126
|
-
const serialized = serializeAKTree(tree);
|
|
127
|
-
if (serialized.includes(selector.replace(/[[\]"]/g, ''))) {
|
|
128
|
-
return { passed: true, reason: `element pattern "${selector}" found in serialized AKTree` };
|
|
129
|
-
}
|
|
130
147
|
return { passed: false, reason: `element "${selector}" not visible` };
|
|
131
148
|
}
|
|
132
149
|
catch {
|
|
@@ -147,6 +164,23 @@ async function checkElementAbsent(adapter, selector) {
|
|
|
147
164
|
}
|
|
148
165
|
}
|
|
149
166
|
async function checkTextContains(adapter, selector, expectedText) {
|
|
167
|
+
const expected = normalizeText(expectedText);
|
|
168
|
+
// Playwright-first (AUT-240, Layer A): read the live DOM text.
|
|
169
|
+
if (adapter.getTextContent) {
|
|
170
|
+
try {
|
|
171
|
+
const live = await adapter.getTextContent(selector);
|
|
172
|
+
if (live !== null && normalizeText(live).includes(expected)) {
|
|
173
|
+
return { passed: true, reason: `element "${selector}" contains "${expectedText}" (Playwright)` };
|
|
174
|
+
}
|
|
175
|
+
// Element found but text didn't match (or selector missed): fall through
|
|
176
|
+
// to the AKTree, which may surface label/value/aria text the raw
|
|
177
|
+
// textContent omits.
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
// Fall through to AKTree.
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Fallback: AKTree (label / value / own text).
|
|
150
184
|
try {
|
|
151
185
|
const tree = await adapter.getAKTree();
|
|
152
186
|
const node = findNodeBySelector(tree, selector);
|
|
@@ -158,9 +192,8 @@ async function checkTextContains(adapter, selector, expectedText) {
|
|
|
158
192
|
node.value || '',
|
|
159
193
|
node.attributes.__ownText || '',
|
|
160
194
|
].join(' '));
|
|
161
|
-
const expected = normalizeText(expectedText);
|
|
162
195
|
if (nodeText.includes(expected)) {
|
|
163
|
-
return { passed: true, reason: `element "${selector}" contains "${expectedText}"` };
|
|
196
|
+
return { passed: true, reason: `element "${selector}" contains "${expectedText}" (AKTree)` };
|
|
164
197
|
}
|
|
165
198
|
return { passed: false, reason: `element "${selector}" text "${nodeText}" does not contain "${expectedText}"` };
|
|
166
199
|
}
|
|
@@ -168,24 +201,39 @@ async function checkTextContains(adapter, selector, expectedText) {
|
|
|
168
201
|
return { passed: false, reason: `error checking text: ${err}` };
|
|
169
202
|
}
|
|
170
203
|
}
|
|
204
|
+
function evaluateOverlayTree(tree) {
|
|
205
|
+
if (tree.overlays.length === 0) {
|
|
206
|
+
return { passed: true, reason: 'no overlays detected' };
|
|
207
|
+
}
|
|
208
|
+
const blocking = tree.overlays.filter(o => o.blocksInteraction);
|
|
209
|
+
if (blocking.length === 0) {
|
|
210
|
+
return { passed: true, reason: 'overlays present but none blocking interaction' };
|
|
211
|
+
}
|
|
212
|
+
return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
|
|
213
|
+
}
|
|
171
214
|
async function checkOverlayDismissed(adapter) {
|
|
172
215
|
try {
|
|
173
|
-
|
|
174
|
-
// Check if any overlays are reported in the tree
|
|
175
|
-
if (tree.overlays.length === 0) {
|
|
176
|
-
return { passed: true, reason: 'no overlays detected' };
|
|
177
|
-
}
|
|
178
|
-
// Check if remaining overlays are blocking
|
|
179
|
-
const blocking = tree.overlays.filter(o => o.blocksInteraction);
|
|
180
|
-
if (blocking.length === 0) {
|
|
181
|
-
return { passed: true, reason: 'overlays present but none blocking interaction' };
|
|
182
|
-
}
|
|
183
|
-
return { passed: false, reason: `${blocking.length} blocking overlay(s) still present` };
|
|
216
|
+
return evaluateOverlayTree(await adapter.getAKTree());
|
|
184
217
|
}
|
|
185
218
|
catch {
|
|
186
|
-
//
|
|
187
|
-
//
|
|
188
|
-
|
|
219
|
+
// AUT-240 (decision 2): "assume OK, but smart". A first `page.evaluate`
|
|
220
|
+
// hiccup (e.g. navigation in flight) is no longer assumed-OK immediately —
|
|
221
|
+
// settle the page and retry the AKTree once.
|
|
222
|
+
try {
|
|
223
|
+
if (adapter.waitForVisuallyStable) {
|
|
224
|
+
await adapter.waitForVisuallyStable({ maxWaitMs: 2000 });
|
|
225
|
+
}
|
|
226
|
+
return evaluateOverlayTree(await adapter.getAKTree());
|
|
227
|
+
}
|
|
228
|
+
catch {
|
|
229
|
+
// Still unverifiable: assume dismissed as a last resort, but flag
|
|
230
|
+
// low-confidence so the post-capture verification scrutinizes it.
|
|
231
|
+
return {
|
|
232
|
+
passed: true,
|
|
233
|
+
reason: 'overlay check unverifiable after settle; assuming dismissed (low-confidence)',
|
|
234
|
+
lowConfidence: true,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
189
237
|
}
|
|
190
238
|
}
|
|
191
239
|
async function checkScreenshotStable(adapter, threshold, context) {
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capture Agent — Program content hashing (run provenance)
|
|
3
|
+
*
|
|
4
|
+
* Stable content hash of an ExecutionProgram, persisted as `program_hash` on
|
|
5
|
+
* each run so a screenshot can be traced back to the exact program bytes that
|
|
6
|
+
* produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
|
|
7
|
+
* the schema validation import chain.
|
|
8
|
+
*/
|
|
9
|
+
import type { ExecutionProgram } from './execution-types.js';
|
|
10
|
+
/** sha256 of the canonicalized program (stable across runs of the same program). */
|
|
11
|
+
export declare function hashProgram(program: ExecutionProgram): string;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capture Agent — Program content hashing (run provenance)
|
|
3
|
+
*
|
|
4
|
+
* Stable content hash of an ExecutionProgram, persisted as `program_hash` on
|
|
5
|
+
* each run so a screenshot can be traced back to the exact program bytes that
|
|
6
|
+
* produced it. Isolated from program-migrations.ts to keep `node:crypto` out of
|
|
7
|
+
* the schema validation import chain.
|
|
8
|
+
*/
|
|
9
|
+
import { createHash } from 'node:crypto';
|
|
10
|
+
/** Deterministic JSON serialization with object keys sorted recursively. */
|
|
11
|
+
function stableStringify(value) {
|
|
12
|
+
if (value === undefined)
|
|
13
|
+
return 'null';
|
|
14
|
+
if (value === null || typeof value !== 'object')
|
|
15
|
+
return JSON.stringify(value);
|
|
16
|
+
if (Array.isArray(value))
|
|
17
|
+
return `[${value.map(stableStringify).join(',')}]`;
|
|
18
|
+
const obj = value;
|
|
19
|
+
const entries = Object.keys(obj)
|
|
20
|
+
.sort()
|
|
21
|
+
.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`);
|
|
22
|
+
return `{${entries.join(',')}}`;
|
|
23
|
+
}
|
|
24
|
+
/** sha256 of the canonicalized program (stable across runs of the same program). */
|
|
25
|
+
export function hashProgram(program) {
|
|
26
|
+
return createHash('sha256').update(stableStringify(program)).digest('hex');
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=program-hash.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Capture Agent — Program FORM migrations (migrate-on-read)
|
|
3
|
+
*
|
|
4
|
+
* Old presets are stored at whatever `programSchemaVersion` (FORM) was current
|
|
5
|
+
* when they were authored. `upgradeProgram` runs a chain of pure
|
|
6
|
+
* `migrate_vN→vN+1` functions to bring any stored program up to the current
|
|
7
|
+
* form BEFORE strict schema validation, so the runner only ever sees one shape.
|
|
8
|
+
*
|
|
9
|
+
* Properties of this layer (decisions locked in AUT-242):
|
|
10
|
+
* - Compat forever: the chain is kept indefinitely; no support window.
|
|
11
|
+
* - Migrate-on-read only: programs are NEVER rewritten back to storage. The
|
|
12
|
+
* stored form changes only when the generator recompiles (create/modify).
|
|
13
|
+
* - Pure + idempotent: a program already at the current form is a no-op.
|
|
14
|
+
*
|
|
15
|
+
* This module is intentionally free of Node-only imports so it can be pulled
|
|
16
|
+
* into the schema validation chain on any runtime. Content hashing
|
|
17
|
+
* (`node:crypto`) lives in program-hash.ts.
|
|
18
|
+
*/
|
|
19
|
+
/**
|
|
20
|
+
* Reads the FORM version a raw (pre-migration) program was stored at.
|
|
21
|
+
* Absent / non-finite ⇒ 0 (the oldest form). Used to stamp run provenance
|
|
22
|
+
* (`program_schema_version_origin`) before `upgradeProgram` bumps it.
|
|
23
|
+
*/
|
|
24
|
+
export declare function readOriginSchemaVersion(raw: unknown): number;
|
|
25
|
+
/**
|
|
26
|
+
* Brings any stored program up to {@link CURRENT_PROGRAM_SCHEMA_VERSION} (form)
|
|
27
|
+
* before strict validation. Pure: clones, never mutates `raw`. Idempotent: a
|
|
28
|
+
* program already at the current form is returned with only its version stamped.
|
|
29
|
+
* Non-object input is returned untouched so the schema raises a clean error.
|
|
30
|
+
*/
|
|
31
|
+
export declare function upgradeProgram(raw: unknown): unknown;
|