autokap 1.9.3 → 1.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/opcode-actions.d.ts +10 -0
- package/dist/opcode-runner.js +161 -91
- package/package.json +1 -1
package/dist/opcode-actions.d.ts
CHANGED
|
@@ -54,5 +54,15 @@ export interface OpcodeActionResult {
|
|
|
54
54
|
* click instead of when the cursor was still travelling.
|
|
55
55
|
*/
|
|
56
56
|
clickTimestampsMs?: number[];
|
|
57
|
+
/**
|
|
58
|
+
* For CAPTURE_SCREENSHOT: index in the variant's `artifacts` array of the
|
|
59
|
+
* screenshot this action just delivered. The deterministic capture pushes the
|
|
60
|
+
* artifact immediately; LLM enrichment (quality verification + alt text) runs
|
|
61
|
+
* afterwards in `executeOpcode` and mutates this artifact in place, so it can
|
|
62
|
+
* never void an already-captured screenshot or trip the action timeout.
|
|
63
|
+
*/
|
|
64
|
+
captureArtifactIndex?: number;
|
|
65
|
+
/** For CAPTURE_SCREENSHOT: page URL at capture time, reused by enrichment. */
|
|
66
|
+
captureUrl?: string;
|
|
57
67
|
}
|
|
58
68
|
export declare function executeOpcodeCoreAction(opcode: ExecutionOpcode, adapter: RuntimeAdapter, context?: OpcodeActionContext): Promise<OpcodeActionResult>;
|
package/dist/opcode-runner.js
CHANGED
|
@@ -55,6 +55,15 @@ export class NoOpRecoveryChain {
|
|
|
55
55
|
}
|
|
56
56
|
const MIN_CLIP_FINALIZATION_TIMEOUT_MS = 30000;
|
|
57
57
|
const DEFAULT_VIDEO_RECORDING_RESOLUTION = { width: 1920, height: 1080 };
|
|
58
|
+
/**
|
|
59
|
+
* The compiled per-opcode action budget. For CAPTURE_SCREENSHOT this governs
|
|
60
|
+
* ONLY the deterministic capture (visual stabilize + screenshot + favicon/title
|
|
61
|
+
* + artifact push); LLM enrichment (quality verification + alt text) runs AFTER
|
|
62
|
+
* the action under the global wait deadline (see `enrichCaptureArtifact`) and
|
|
63
|
+
* must never be folded back under this timeout — that is the regression this
|
|
64
|
+
* separation prevents. END_CLIP finalization gets a floor since muxing a
|
|
65
|
+
* recording is inherently slow.
|
|
66
|
+
*/
|
|
58
67
|
function resolveOpcodeTimeoutMs(opcode) {
|
|
59
68
|
if (opcode.kind === 'END_CLIP') {
|
|
60
69
|
return Math.max(opcode.timeoutMs, MIN_CLIP_FINALIZATION_TIMEOUT_MS);
|
|
@@ -423,10 +432,18 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
423
432
|
logger.debug(`[opcode ${index}] captureBeforeState took ${Date.now() - beforeStart}ms`);
|
|
424
433
|
}
|
|
425
434
|
// `WAIT_FOR` is a pure wait: it extends while the page is progressing, up to
|
|
426
|
-
// the global deadline.
|
|
427
|
-
//
|
|
435
|
+
// the global deadline. Artifact-producing capture opcodes (CAPTURE_SCREENSHOT,
|
|
436
|
+
// END_CLIP) also run against the global deadline, NOT the narrow compiled
|
|
437
|
+
// timeout: on a heavy, perpetually-animating page the deterministic capture
|
|
438
|
+
// (visual stabilize + screenshot / clip finalization) can exceed the 10s
|
|
439
|
+
// compiled budget on its own — especially under parallel CPU contention on the
|
|
440
|
+
// runner — even with LLM enrichment moved off this timed path. The compiled
|
|
441
|
+
// `timeoutMs` stays a floor (it never shortens the global deadline). All other
|
|
442
|
+
// opcodes are interactions bounded by the narrow per-opcode deadline
|
|
443
|
+
// (Playwright auto-waiting covers them).
|
|
428
444
|
const isPureWait = opcode.kind === 'WAIT_FOR';
|
|
429
|
-
const
|
|
445
|
+
const usesGlobalDeadline = isPureWait || isArtifactProducingOpcode(opcode.kind);
|
|
446
|
+
const actionDeadlineMs = usesGlobalDeadline ? globalDeadlineMs : deadlineMs;
|
|
430
447
|
const actionBudgetMs = getRemainingTimeMs(actionDeadlineMs);
|
|
431
448
|
if (actionBudgetMs <= 0) {
|
|
432
449
|
const reason = `timeout after ${effectiveTimeoutMs}ms`;
|
|
@@ -543,6 +560,18 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
|
|
|
543
560
|
await sleep(VIDEO_POST_ACTION_SETTLE_MS);
|
|
544
561
|
}
|
|
545
562
|
breaker.recordSuccess(index);
|
|
563
|
+
// Enrich the just-captured screenshot OFF the timed action path: quality
|
|
564
|
+
// verification + alt text run here, under the global wait deadline, and
|
|
565
|
+
// mutate the artifact in place — so slow LLM calls can never void a
|
|
566
|
+
// delivered screenshot (the failure this fixes). Recaptured screenshots from
|
|
567
|
+
// the recovery path are intentionally left un-enriched (recovery stays
|
|
568
|
+
// deterministic and fast).
|
|
569
|
+
if (opcode.kind === 'CAPTURE_SCREENSHOT'
|
|
570
|
+
&& options.llmConfig
|
|
571
|
+
&& result.captureArtifactIndex !== undefined
|
|
572
|
+
&& result.captureUrl !== undefined) {
|
|
573
|
+
await enrichCaptureArtifact(artifacts[result.captureArtifactIndex], opcode, adapter, result.captureUrl, currentVariant, options, telemetry, globalDeadlineMs);
|
|
574
|
+
}
|
|
546
575
|
return {
|
|
547
576
|
opcodeIndex: index,
|
|
548
577
|
kind: opcode.kind,
|
|
@@ -760,13 +789,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
760
789
|
// semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
|
|
761
790
|
// legacy smart-wait for adapters that don't implement it. A page that
|
|
762
791
|
// never fully settles (e.g. a perpetual animation) is captured anyway.
|
|
763
|
-
const
|
|
764
|
-
? adapter.waitForVisuallyStable({ maxWaitMs })
|
|
765
|
-
: smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
|
|
766
|
-
stable: r.stable,
|
|
767
|
-
reason: r.waitedFor.join(', ') || 'unknown',
|
|
768
|
-
}));
|
|
769
|
-
const stability = await stabilize(5000);
|
|
792
|
+
const stability = await stabilizeForCapture(adapter, 5000);
|
|
770
793
|
if (!stability.stable) {
|
|
771
794
|
logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
|
|
772
795
|
}
|
|
@@ -784,85 +807,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
784
807
|
}
|
|
785
808
|
const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
|
|
786
809
|
const captureUrl = await adapter.getCurrentUrl();
|
|
787
|
-
const
|
|
788
|
-
if (opcode.elementSelector && adapter.takeElementScreenshot) {
|
|
789
|
-
return adapter.takeElementScreenshot(opcode.elementSelector, opcode.outscale);
|
|
790
|
-
}
|
|
791
|
-
if (opcode.elementSelector) {
|
|
792
|
-
throw new Error(`element capture requires adapter support for selector "${opcode.elementSelector}"`);
|
|
793
|
-
}
|
|
794
|
-
return adapter.takeScreenshot();
|
|
795
|
-
};
|
|
796
|
-
let buffer = await takeBuffer();
|
|
797
|
-
if (runOptions?.llmConfig) {
|
|
798
|
-
const verification = await verifyCaptureQuality(buffer, {
|
|
799
|
-
expectedDescription: opcode.description,
|
|
800
|
-
url: captureUrl,
|
|
801
|
-
locale: currentVariant?.locale,
|
|
802
|
-
theme: currentVariant?.theme,
|
|
803
|
-
}, runOptions.llmConfig);
|
|
804
|
-
if (verification.llmResult) {
|
|
805
|
-
telemetry.llmCallCount++;
|
|
806
|
-
telemetry.llmCostEur += verification.llmResult.costEur;
|
|
807
|
-
telemetry.llmStepUsages.push({
|
|
808
|
-
stepType: 'capture_verification',
|
|
809
|
-
generationId: verification.llmResult.generationId,
|
|
810
|
-
model: verification.llmResult.model,
|
|
811
|
-
promptTokens: verification.llmResult.promptTokens,
|
|
812
|
-
completionTokens: verification.llmResult.completionTokens,
|
|
813
|
-
});
|
|
814
|
-
}
|
|
815
|
-
if (!verification.passed) {
|
|
816
|
-
await stabilize(8000);
|
|
817
|
-
const retryBuffer = await takeBuffer();
|
|
818
|
-
const retryVerification = await verifyCaptureQuality(retryBuffer, {
|
|
819
|
-
expectedDescription: opcode.description,
|
|
820
|
-
url: captureUrl,
|
|
821
|
-
locale: currentVariant?.locale,
|
|
822
|
-
theme: currentVariant?.theme,
|
|
823
|
-
}, runOptions.llmConfig);
|
|
824
|
-
if (retryVerification.llmResult) {
|
|
825
|
-
telemetry.llmCallCount++;
|
|
826
|
-
telemetry.llmCostEur += retryVerification.llmResult.costEur;
|
|
827
|
-
telemetry.llmStepUsages.push({
|
|
828
|
-
stepType: 'capture_verification',
|
|
829
|
-
generationId: retryVerification.llmResult.generationId,
|
|
830
|
-
model: retryVerification.llmResult.model,
|
|
831
|
-
promptTokens: retryVerification.llmResult.promptTokens,
|
|
832
|
-
completionTokens: retryVerification.llmResult.completionTokens,
|
|
833
|
-
});
|
|
834
|
-
}
|
|
835
|
-
if (retryVerification.passed) {
|
|
836
|
-
buffer = retryBuffer;
|
|
837
|
-
}
|
|
838
|
-
}
|
|
839
|
-
}
|
|
840
|
-
let altText;
|
|
841
|
-
if (runOptions?.llmConfig) {
|
|
842
|
-
try {
|
|
843
|
-
const altResult = await generateAltText(buffer, {
|
|
844
|
-
description: opcode.description,
|
|
845
|
-
url: captureUrl,
|
|
846
|
-
locale: currentVariant?.locale,
|
|
847
|
-
presetName: runOptions.presetName,
|
|
848
|
-
}, runOptions.llmConfig);
|
|
849
|
-
altText = altResult.altText;
|
|
850
|
-
if (altResult.llmResult) {
|
|
851
|
-
telemetry.llmCallCount++;
|
|
852
|
-
telemetry.llmCostEur += altResult.llmResult.costEur;
|
|
853
|
-
telemetry.llmStepUsages.push({
|
|
854
|
-
stepType: 'alt_text_generation',
|
|
855
|
-
generationId: altResult.llmResult.generationId,
|
|
856
|
-
model: altResult.llmResult.model,
|
|
857
|
-
promptTokens: altResult.llmResult.promptTokens,
|
|
858
|
-
completionTokens: altResult.llmResult.completionTokens,
|
|
859
|
-
});
|
|
860
|
-
}
|
|
861
|
-
}
|
|
862
|
-
catch {
|
|
863
|
-
// Alt text generation failed — non-fatal
|
|
864
|
-
}
|
|
865
|
-
}
|
|
810
|
+
const buffer = await takeCaptureBuffer(adapter, opcode);
|
|
866
811
|
// Extract page favicon for browser bar mockup
|
|
867
812
|
let tabIconData;
|
|
868
813
|
let tabIconMimeType;
|
|
@@ -892,7 +837,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
892
837
|
captureId: opcode.captureId,
|
|
893
838
|
captureName: opcode.captureName ?? opcode.description,
|
|
894
839
|
elementSelector: opcode.elementSelector,
|
|
895
|
-
altText,
|
|
840
|
+
altText: undefined,
|
|
896
841
|
stepDescription: opcode.description,
|
|
897
842
|
stepIndex: opcodeIndex,
|
|
898
843
|
variantId: currentVariant?.id,
|
|
@@ -901,7 +846,11 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
901
846
|
lowConfidence: captureLowConfidence || undefined,
|
|
902
847
|
lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
|
|
903
848
|
});
|
|
904
|
-
|
|
849
|
+
// Deliver the screenshot artifact NOW. LLM enrichment (quality
|
|
850
|
+
// verification + alt text) runs afterwards in `executeOpcode`, off this
|
|
851
|
+
// timed action path, and mutates the artifact in place — it can never
|
|
852
|
+
// void a captured screenshot or push the action past its timeout.
|
|
853
|
+
return { success: true, captureArtifactIndex: artifacts.length - 1, captureUrl };
|
|
905
854
|
}
|
|
906
855
|
case 'BEGIN_CLIP': {
|
|
907
856
|
if (executionState.activeClip) {
|
|
@@ -967,6 +916,127 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
|
|
|
967
916
|
};
|
|
968
917
|
}
|
|
969
918
|
}
|
|
919
|
+
// ── Capture: deterministic capture + best-effort enrichment ─────────
|
|
920
|
+
/**
|
|
921
|
+
* Re-stabilize budget for the verification retry. Sized for the post-action
|
|
922
|
+
* enrichment budget (under the ~30s global deadline), not the tight per-opcode
|
|
923
|
+
* action timeout — it only needs to clear transient skeletons/spinners before a
|
|
924
|
+
* second look. Was 8000ms when this ran inside the 10s action; that starved the
|
|
925
|
+
* capture and is exactly what this separation removes.
|
|
926
|
+
*/
|
|
927
|
+
const VERIFY_RETRY_STABILIZE_MS = 3000;
|
|
928
|
+
/** Visual stabilization shared by the capture action and the verify retry.
|
|
929
|
+
* Never fails the capture — a page that never settles is captured anyway. */
|
|
930
|
+
function stabilizeForCapture(adapter, maxWaitMs) {
|
|
931
|
+
return adapter.waitForVisuallyStable
|
|
932
|
+
? adapter.waitForVisuallyStable({ maxWaitMs })
|
|
933
|
+
: smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
|
|
934
|
+
stable: r.stable,
|
|
935
|
+
reason: r.waitedFor.join(', ') || 'unknown',
|
|
936
|
+
}));
|
|
937
|
+
}
|
|
938
|
+
/** Take the screenshot buffer for a CAPTURE_SCREENSHOT opcode (element clip or
|
|
939
|
+
* full page). Shared by the capture action and the verify retry. */
|
|
940
|
+
function takeCaptureBuffer(adapter, opcode) {
|
|
941
|
+
if (opcode.elementSelector && adapter.takeElementScreenshot) {
|
|
942
|
+
return adapter.takeElementScreenshot(opcode.elementSelector, opcode.outscale);
|
|
943
|
+
}
|
|
944
|
+
if (opcode.elementSelector) {
|
|
945
|
+
throw new Error(`element capture requires adapter support for selector "${opcode.elementSelector}"`);
|
|
946
|
+
}
|
|
947
|
+
return adapter.takeScreenshot();
|
|
948
|
+
}
|
|
949
|
+
function recordCaptureVerificationTelemetry(telemetry, llmResult) {
|
|
950
|
+
if (!llmResult)
|
|
951
|
+
return;
|
|
952
|
+
telemetry.llmCallCount++;
|
|
953
|
+
telemetry.llmCostEur += llmResult.costEur;
|
|
954
|
+
telemetry.llmStepUsages.push({
|
|
955
|
+
stepType: 'capture_verification',
|
|
956
|
+
generationId: llmResult.generationId,
|
|
957
|
+
model: llmResult.model,
|
|
958
|
+
promptTokens: llmResult.promptTokens,
|
|
959
|
+
completionTokens: llmResult.completionTokens,
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
/**
|
|
963
|
+
* Best-effort LLM enrichment of an already-delivered screenshot artifact:
|
|
964
|
+
* quality verification (with one budget-bounded re-stabilize + re-shoot) and
|
|
965
|
+
* alt text. Runs AFTER the deterministic capture action, off its timed path,
|
|
966
|
+
* and mutates the artifact in place. By construction it never changes
|
|
967
|
+
* `artifacts.length`, so it can never turn a captured screenshot into a
|
|
968
|
+
* "no artifact" failure — the regression this fixes.
|
|
969
|
+
*
|
|
970
|
+
* Bounded by the per-opcode global wait deadline. If the budget elapses
|
|
971
|
+
* mid-call the in-flight LLM promise is orphaned; `settled` makes any late
|
|
972
|
+
* in-place write a no-op so it cannot mutate an artifact the runner moved past.
|
|
973
|
+
*/
|
|
974
|
+
async function enrichCaptureArtifact(artifact, opcode, adapter, captureUrl, currentVariant, runOptions, telemetry, globalDeadlineMs) {
|
|
975
|
+
const llmConfig = runOptions.llmConfig;
|
|
976
|
+
if (!llmConfig)
|
|
977
|
+
return;
|
|
978
|
+
const budgetMs = getRemainingTimeMs(globalDeadlineMs);
|
|
979
|
+
if (budgetMs <= 0)
|
|
980
|
+
return;
|
|
981
|
+
let settled = false;
|
|
982
|
+
const verificationContext = {
|
|
983
|
+
expectedDescription: opcode.description,
|
|
984
|
+
url: captureUrl,
|
|
985
|
+
locale: currentVariant?.locale,
|
|
986
|
+
theme: currentVariant?.theme,
|
|
987
|
+
};
|
|
988
|
+
try {
|
|
989
|
+
await withTimeout(async () => {
|
|
990
|
+
const verification = await verifyCaptureQuality(artifact.buffer, verificationContext, llmConfig);
|
|
991
|
+
recordCaptureVerificationTelemetry(telemetry, verification.llmResult);
|
|
992
|
+
// On a failed verdict, give the page a brief settle and re-shoot once,
|
|
993
|
+
// bounded by the remaining global budget. Swap the buffer in place only if
|
|
994
|
+
// the second shot verifies clean.
|
|
995
|
+
if (!verification.passed) {
|
|
996
|
+
const retryStabilizeMs = Math.min(VERIFY_RETRY_STABILIZE_MS, getRemainingTimeMs(globalDeadlineMs));
|
|
997
|
+
if (retryStabilizeMs > 0) {
|
|
998
|
+
await stabilizeForCapture(adapter, retryStabilizeMs);
|
|
999
|
+
const retryBuffer = await takeCaptureBuffer(adapter, opcode);
|
|
1000
|
+
const retryVerification = await verifyCaptureQuality(retryBuffer, verificationContext, llmConfig);
|
|
1001
|
+
recordCaptureVerificationTelemetry(telemetry, retryVerification.llmResult);
|
|
1002
|
+
if (retryVerification.passed && !settled) {
|
|
1003
|
+
artifact.buffer = retryBuffer;
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
try {
|
|
1008
|
+
const altResult = await generateAltText(artifact.buffer, {
|
|
1009
|
+
description: opcode.description,
|
|
1010
|
+
url: captureUrl,
|
|
1011
|
+
locale: currentVariant?.locale,
|
|
1012
|
+
presetName: runOptions.presetName,
|
|
1013
|
+
}, llmConfig);
|
|
1014
|
+
if (!settled)
|
|
1015
|
+
artifact.altText = altResult.altText;
|
|
1016
|
+
if (altResult.llmResult) {
|
|
1017
|
+
telemetry.llmCallCount++;
|
|
1018
|
+
telemetry.llmCostEur += altResult.llmResult.costEur;
|
|
1019
|
+
telemetry.llmStepUsages.push({
|
|
1020
|
+
stepType: 'alt_text_generation',
|
|
1021
|
+
generationId: altResult.llmResult.generationId,
|
|
1022
|
+
model: altResult.llmResult.model,
|
|
1023
|
+
promptTokens: altResult.llmResult.promptTokens,
|
|
1024
|
+
completionTokens: altResult.llmResult.completionTokens,
|
|
1025
|
+
});
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
catch {
|
|
1029
|
+
// Alt text generation failed — non-fatal.
|
|
1030
|
+
}
|
|
1031
|
+
}, budgetMs);
|
|
1032
|
+
}
|
|
1033
|
+
catch (err) {
|
|
1034
|
+
logger.debug(`[opcode CAPTURE_SCREENSHOT] enrichment skipped: ${err instanceof Error ? err.message : String(err)}`);
|
|
1035
|
+
}
|
|
1036
|
+
finally {
|
|
1037
|
+
settled = true;
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
970
1040
|
/**
|
|
971
1041
|
* Snapshot per-opcode timing + element bbox before the action runs. Returns
|
|
972
1042
|
* null when no timing should be emitted (mediaMode != video, or no active
|