autokap 1.9.2 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -217,12 +217,51 @@ export async function runCapture(options) {
217
217
  // badge on the preset while it captures. Best-effort + local-only: skip dry
218
218
  // runs and cloud runs (AUTOKAP_RUN_ID is set on cloud runners, which own
219
219
  // their own capture_runs row). A failure here must never block the capture.
220
- if (!options.dryRun && !process.env.AUTOKAP_RUN_ID) {
220
+ const isLocalRegisteredRun = !options.dryRun && !process.env.AUTOKAP_RUN_ID;
221
+ if (isLocalRegisteredRun) {
221
222
  await postRunStart(config, runId, program.presetId, program.variants.length, options.env);
222
223
  }
224
+ // Ctrl-C handling: a local run that registered itself server-side must never
225
+ // leave the preset stuck on the "En cours" badge. On SIGINT/SIGTERM we abort
226
+ // the in-flight capture (so the finally-block cleanup runs) and best-effort
227
+ // tell the server the run was interrupted — otherwise the capture_runs row
228
+ // sits in `running` and the preset stays "capturing" until the 15-min stale
229
+ // cutoff. A caller-supplied abortSignal is forwarded into the same controller
230
+ // so both paths converge on one cancellation.
231
+ const abortController = new AbortController();
232
+ if (options.abortSignal) {
233
+ if (options.abortSignal.aborted) {
234
+ abortController.abort(options.abortSignal.reason);
235
+ }
236
+ else {
237
+ options.abortSignal.addEventListener('abort', () => abortController.abort(options.abortSignal.reason), { once: true });
238
+ }
239
+ }
240
+ let interrupted = false;
241
+ let abortNotified = false;
242
+ const notifyAborted = async () => {
243
+ if (abortNotified || !isLocalRegisteredRun)
244
+ return;
245
+ abortNotified = true;
246
+ await postRunAborted(config, runId, program.presetId, options.env);
247
+ };
248
+ const onInterrupt = (signal) => {
249
+ if (interrupted) {
250
+ // Second interrupt: the user wants out now. Hard-exit with the
251
+ // conventional 128+SIGINT(2) code; the best-effort notify already fired.
252
+ process.exit(130);
253
+ }
254
+ interrupted = true;
255
+ logger.warn(`[capture] ${signal} received — stopping the run and notifying AutoKap…`);
256
+ abortController.abort(new Error('User interrupted (Ctrl-C)'));
257
+ };
258
+ if (isLocalRegisteredRun) {
259
+ process.on('SIGINT', onInterrupt);
260
+ process.on('SIGTERM', onInterrupt);
261
+ }
223
262
  const runOptions = {
224
263
  recoveryChain,
225
- abortSignal: options.abortSignal,
264
+ abortSignal: abortController.signal,
226
265
  maxParallelVariants,
227
266
  llmConfig,
228
267
  presetName: program.presetId,
@@ -292,15 +331,30 @@ export async function runCapture(options) {
292
331
  };
293
332
  let runResult;
294
333
  let cliResult;
334
+ let runAborted = false;
295
335
  try {
296
336
  runResult = await executeProgram(program, createAdapter, runOptions);
337
+ runAborted = interrupted || runResult.error === 'aborted';
297
338
  if (runResult.success) {
298
339
  logger.info(`[capture] Run completed successfully — ${runResult.telemetry.totalOpcodes} opcodes, ${runResult.telemetry.recoveredOpcodes} recovered, ${runResult.totalDurationMs}ms`);
299
340
  }
341
+ else if (runAborted) {
342
+ logger.warn('[capture] Run interrupted by the user — skipping artifact upload.');
343
+ }
300
344
  else {
301
345
  logger.error(`[capture] Run failed: ${runResult.error}`);
302
346
  }
303
- if (options.dryRun) {
347
+ if (runAborted) {
348
+ // Don't upload partial artifacts for a cancelled run; the server is told
349
+ // separately (notifyAborted) so the preset leaves the "capturing" state.
350
+ cliResult = {
351
+ success: false,
352
+ runId,
353
+ runResult,
354
+ error: 'Capture interrupted (Ctrl-C)',
355
+ };
356
+ }
357
+ else if (options.dryRun) {
304
358
  logger.info(`[capture] DRY RUN complete — ${runResult.telemetry.totalOpcodes} opcodes executed, 0 captures, 0 credits charged`);
305
359
  cliResult = { success: runResult.success, runId, runResult };
306
360
  }
@@ -354,6 +408,16 @@ export async function runCapture(options) {
354
408
  }
355
409
  }
356
410
  finally {
411
+ if (isLocalRegisteredRun) {
412
+ process.off('SIGINT', onInterrupt);
413
+ process.off('SIGTERM', onInterrupt);
414
+ }
415
+ // On interruption, mark the run terminal server-side regardless of the
416
+ // user's debug-log preference, so the preset never stays stuck "capturing".
417
+ // Idempotent with the error-log flush below (same capture_failed dedupeKey).
418
+ if (runAborted) {
419
+ await notifyAborted();
420
+ }
357
421
  // AUT-149: export structured debug logs to AutoKap on capture failure.
358
422
  // Best-effort — the LogCollector swallows network errors.
359
423
  const shouldExport = options.exportDebugLogs !== false
@@ -693,6 +757,29 @@ async function postRunStart(config, runId, presetId, variantCount, env) {
693
757
  logger.warn(`[capture] Run registration error: ${message}`);
694
758
  }
695
759
  }
760
+ // Best-effort terminal notification when a local run is interrupted (Ctrl-C).
761
+ // Marks the run failed server-side and clears the preset's "capturing" badge,
762
+ // so an aborted capture can't leave the preset stuck "En cours". Never throws.
763
+ async function postRunAborted(config, runId, presetId, env) {
764
+ try {
765
+ const response = await fetch(`${config.apiBaseUrl}/api/cli/runs`, {
766
+ method: 'PATCH',
767
+ headers: {
768
+ 'Authorization': `Bearer ${config.apiKey}`,
769
+ 'Content-Type': 'application/json',
770
+ },
771
+ body: JSON.stringify({ runId, presetId, status: 'aborted', env }),
772
+ signal: AbortSignal.timeout(10_000),
773
+ });
774
+ if (!response.ok) {
775
+ logger.warn(`[capture] Failed to report interruption (HTTP ${response.status}); the preset may show "in progress" until the stale cutoff`);
776
+ }
777
+ }
778
+ catch (err) {
779
+ const message = err instanceof Error ? err.message : String(err);
780
+ logger.warn(`[capture] Interruption report error: ${message}`);
781
+ }
782
+ }
696
783
  async function uploadResults(config, program, result, runId, sessionId, provenance) {
697
784
  const artifactJobs = result.variantResults.flatMap((variant) => {
698
785
  const variantSpec = program.variants.find((entry) => entry.id === variant.variantId);
@@ -600,10 +600,11 @@ export interface ExecutionProgram {
600
600
  */
601
601
  deviceConfigs?: Record<string, DeviceConfig>;
602
602
  /**
603
- * Project-level public URL used to decorate browser mockups. The CLI
603
+ * Project-level decorative URL used to decorate browser mockups. The CLI
604
604
  * substitutes the captured origin (typically a local dev server) with this
605
605
  * value via `transformBrowserUrl` before baking it into the browser bar.
606
- * Server-resolved from `projects.public_url`.
606
+ * AUT-269: derived automatically from the project's prod environment base
607
+ * URL (absent when no prod environment is configured), not a separate field.
607
608
  */
608
609
  publicUrl?: string;
609
610
  /**
@@ -54,5 +54,15 @@ export interface OpcodeActionResult {
54
54
  * click instead of when the cursor was still travelling.
55
55
  */
56
56
  clickTimestampsMs?: number[];
57
+ /**
58
+ * For CAPTURE_SCREENSHOT: index in the variant's `artifacts` array of the
59
+ * screenshot this action just delivered. The deterministic capture pushes the
60
+ * artifact immediately; LLM enrichment (quality verification + alt text) runs
61
+ * afterwards in `executeOpcode` and mutates this artifact in place, so it can
62
+ * never void an already-captured screenshot or trip the action timeout.
63
+ */
64
+ captureArtifactIndex?: number;
65
+ /** For CAPTURE_SCREENSHOT: page URL at capture time, reused by enrichment. */
66
+ captureUrl?: string;
57
67
  }
58
68
  export declare function executeOpcodeCoreAction(opcode: ExecutionOpcode, adapter: RuntimeAdapter, context?: OpcodeActionContext): Promise<OpcodeActionResult>;
@@ -55,6 +55,15 @@ export class NoOpRecoveryChain {
55
55
  }
56
56
  const MIN_CLIP_FINALIZATION_TIMEOUT_MS = 30000;
57
57
  const DEFAULT_VIDEO_RECORDING_RESOLUTION = { width: 1920, height: 1080 };
58
+ /**
59
+ * The compiled per-opcode action budget. For CAPTURE_SCREENSHOT this governs
60
+ * ONLY the deterministic capture (visual stabilize + screenshot + favicon/title
61
+ * + artifact push); LLM enrichment (quality verification + alt text) runs AFTER
62
+ * the action under the global wait deadline (see `enrichCaptureArtifact`) and
63
+ * must never be folded back under this timeout — that is the regression this
64
+ * separation prevents. END_CLIP finalization gets a floor since muxing a
65
+ * recording is inherently slow.
66
+ */
58
67
  function resolveOpcodeTimeoutMs(opcode) {
59
68
  if (opcode.kind === 'END_CLIP') {
60
69
  return Math.max(opcode.timeoutMs, MIN_CLIP_FINALIZATION_TIMEOUT_MS);
@@ -156,8 +165,31 @@ export async function executeProgram(program, createAdapter, options = {}) {
156
165
  });
157
166
  await Promise.all(workers);
158
167
  const completedVariantResults = variantResults.filter((result) => Boolean(result));
159
- const aborted = options.abortSignal?.aborted && completedVariantResults.length < program.variants.length;
160
- const success = !aborted && completedVariantResults.length > 0 && completedVariantResults.every(v => v.success);
168
+ const aborted = Boolean(options.abortSignal?.aborted) && completedVariantResults.length < program.variants.length;
169
+ // Fail-closed on incomplete delivery. Previously `success` was computed purely
170
+ // from the SURVIVING variant results, so a run that silently dropped variants
171
+ // (a variant finishing "ok" while persisting no artifact) was recorded as a
172
+ // clean success — a 4-of-24 capture looked identical to a full run, got billed,
173
+ // was never retried, and surfaced nothing. Require that every variant ran,
174
+ // succeeded, AND produced its expected number of artifacts; name the deficient
175
+ // variants so the failure is actionable and the runtime cause is diagnosable
176
+ // straight from the run log. Dry runs intentionally skip capture opcodes (and
177
+ // programs with no capture points have nothing to enforce), so they are exempt
178
+ // from the artifact check but still require every variant to succeed.
179
+ const expectedArtifactsPerVariant = program.steps.filter((step) => isArtifactProducingOpcode(step.kind)).length;
180
+ const enforceArtifactCompleteness = !options.dryRun && expectedArtifactsPerVariant > 0;
181
+ const deficientVariants = program.variants
182
+ .map((variant, index) => ({ variant, result: variantResults[index] }))
183
+ .filter(({ result }) => !result
184
+ || !result.success
185
+ || (enforceArtifactCompleteness && result.artifacts.length < expectedArtifactsPerVariant))
186
+ .map(({ variant, result }) => !result
187
+ ? `${variant.id} (not executed)`
188
+ : !result.success
189
+ ? `${variant.id} (failed: ${result.error ?? 'unknown error'})`
190
+ : `${variant.id} (${result.artifacts.length}/${expectedArtifactsPerVariant} artifacts)`);
191
+ const incompleteDelivery = deficientVariants.length > 0;
192
+ const success = !aborted && completedVariantResults.length > 0 && !incompleteDelivery;
161
193
  const detectedAppVersion = completedVariantResults.reduce((acc, variantResult) => acc ?? (variantResult.detectedAppVersion ?? null), null);
162
194
  // AUT-241 — surface (don't mask) cuts: aggregate every recording warning from
163
195
  // each variant's clip/video artifacts. Diagnostic only; never affects success.
@@ -172,7 +204,11 @@ export async function executeProgram(program, createAdapter, options = {}) {
172
204
  totalDurationMs: Date.now() - startTime,
173
205
  detectedAppVersion,
174
206
  warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
175
- error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
207
+ error: aborted
208
+ ? 'aborted'
209
+ : success
210
+ ? undefined
211
+ : `incomplete run: ${deficientVariants.length}/${program.variants.length} variant(s) did not deliver expected artifacts — ${deficientVariants.join('; ')}`,
176
212
  failureKind: success ? undefined : completedVariantResults.find(v => v.failureKind)?.failureKind,
177
213
  };
178
214
  }
@@ -320,6 +356,13 @@ function softSkipResult(opcode, index, startTime, reason, telemetry) {
320
356
  error: reason,
321
357
  };
322
358
  }
359
+ /** Opcodes whose ACTION produces a persisted artifact (a screenshot or a finalized
360
+ * video clip). A passing postcondition does NOT imply the artifact exists, so these
361
+ * get special handling on the recovery path (executeOpcode → failWithRecovery) and
362
+ * in the run-level completeness gate (executeProgram). */
363
+ function isArtifactProducingOpcode(kind) {
364
+ return kind === 'CAPTURE_SCREENSHOT' || kind === 'END_CLIP';
365
+ }
323
366
  async function executeOpcode(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, opcodeTimings, artifacts, options, variantId, executionState, artifactPlan, mockDataGroups, currentVariant, credentials) {
324
367
  const startTime = Date.now();
325
368
  const effectiveTimeoutMs = resolveOpcodeTimeoutMs(opcode);
@@ -333,6 +376,47 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
333
376
  const getProgress = makeProgressGetter(adapter);
334
377
  const actionEffectPolicy = getOpcodeActionEffectPolicy(opcode);
335
378
  const isSoft = isSoftOpcodeKind(opcode.kind);
379
+ // Snapshot so we can tell whether THIS opcode produced its artifact. A recovered
380
+ // CAPTURE_SCREENSHOT must not pass as a phantom success when no screenshot was
381
+ // taken — the postcondition (e.g. element_visible) can pass without a capture.
382
+ const artifactCountAtStart = artifacts.length;
383
+ // Wraps handleFailure: when recovery succeeds for an artifact-producing opcode
384
+ // that pushed NO artifact for itself, re-run the capture once so the artifact
385
+ // truly exists; hard-fail if it still can't. END_CLIP finalization is stateful,
386
+ // so it hard-fails without re-running. Without this a "recovered"
387
+ // CAPTURE_SCREENSHOT finishes ok with no screenshot — the silent partial loss.
388
+ const failWithRecovery = async (reason) => {
389
+ const failureResult = await handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
390
+ if (failureResult.status !== 'recovered'
391
+ || !isArtifactProducingOpcode(opcode.kind)
392
+ || artifacts.length > artifactCountAtStart) {
393
+ return failureResult;
394
+ }
395
+ if (opcode.kind === 'CAPTURE_SCREENSHOT') {
396
+ const recaptureBudgetMs = getRemainingTimeMs(globalDeadlineMs);
397
+ if (recaptureBudgetMs > 0) {
398
+ let recapture;
399
+ try {
400
+ recapture = await withTimeout(() => executeOpcodeAction(opcode, index, adapter, artifacts, telemetry, currentVariant, executionState, artifactPlan, mockDataGroups, options, credentials), recaptureBudgetMs);
401
+ }
402
+ catch (err) {
403
+ recapture = { success: false, error: err instanceof Error ? err.message : String(err) };
404
+ }
405
+ if (recapture.success && artifacts.length > artifactCountAtStart) {
406
+ logger.debug(`[opcode ${index}] re-captured screenshot after recovery (${reason})`);
407
+ return failureResult;
408
+ }
409
+ }
410
+ }
411
+ return {
412
+ opcodeIndex: index,
413
+ kind: opcode.kind,
414
+ status: 'failed',
415
+ durationMs: Date.now() - startTime,
416
+ recoveryAttempts: failureResult.recoveryAttempts ?? 1,
417
+ error: `recovery succeeded but produced no ${opcode.kind === 'END_CLIP' ? 'clip' : 'screenshot'} artifact: ${reason}`,
418
+ };
419
+ };
336
420
  // Track page context for circuit breaker
337
421
  try {
338
422
  const url = await adapter.getCurrentUrl();
@@ -358,7 +442,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
358
442
  logger.debug(`[opcode ${index}] no budget left after captureBeforeState (deadline=${actionDeadlineMs}, now=${Date.now()})`);
359
443
  if (isSoft)
360
444
  return softSkipResult(opcode, index, startTime, reason, telemetry);
361
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
445
+ return failWithRecovery(reason);
362
446
  }
363
447
  // For mediaMode='video', capture pre-action timing + bbox metadata inside
364
448
  // the active clip window only. Opcodes outside a clip are not part of the
@@ -407,7 +491,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
407
491
  const reason = result.error ?? 'action failed';
408
492
  if (isSoft)
409
493
  return softSkipResult(opcode, index, startTime, reason, telemetry);
410
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
494
+ return failWithRecovery(reason);
411
495
  }
412
496
  // Verify postcondition — extend-on-progress up to the global deadline so a
413
497
  // slow action no longer starves it (failure mode #3: clamped to ~1ms).
@@ -417,7 +501,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
417
501
  logger.debug(`[opcode ${index}] no budget left for postcondition check`);
418
502
  if (isSoft)
419
503
  return softSkipResult(opcode, index, startTime, reason, telemetry);
420
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
504
+ return failWithRecovery(reason);
421
505
  }
422
506
  const runtimePostcondition = resolveRuntimePostcondition(opcode);
423
507
  const postStart = Date.now();
@@ -430,7 +514,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
430
514
  const reason = `postcondition failed: ${postcondition.reason}`;
431
515
  if (isSoft)
432
516
  return softSkipResult(opcode, index, startTime, reason, telemetry);
433
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, reason);
517
+ return failWithRecovery(reason);
434
518
  }
435
519
  // Verify action effects through the shared policy. Weak `any_change`
436
520
  // postconditions are only meaningful if this verifier observes a real
@@ -446,7 +530,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
446
530
  `postcondition passed, treating as redundant-but-successful`);
447
531
  }
448
532
  else {
449
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, `action had no effect: ${verification.summary}`);
533
+ return failWithRecovery(`action had no effect: ${verification.summary}`);
450
534
  }
451
535
  }
452
536
  }
@@ -468,6 +552,18 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
468
552
  await sleep(VIDEO_POST_ACTION_SETTLE_MS);
469
553
  }
470
554
  breaker.recordSuccess(index);
555
+ // Enrich the just-captured screenshot OFF the timed action path: quality
556
+ // verification + alt text run here, under the global wait deadline, and
557
+ // mutate the artifact in place — so slow LLM calls can never void a
558
+ // delivered screenshot (the failure this fixes). Recaptured screenshots from
559
+ // the recovery path are intentionally left un-enriched (recovery stays
560
+ // deterministic and fast).
561
+ if (opcode.kind === 'CAPTURE_SCREENSHOT'
562
+ && options.llmConfig
563
+ && result.captureArtifactIndex !== undefined
564
+ && result.captureUrl !== undefined) {
565
+ await enrichCaptureArtifact(artifacts[result.captureArtifactIndex], opcode, adapter, result.captureUrl, currentVariant, options, telemetry, globalDeadlineMs);
566
+ }
471
567
  return {
472
568
  opcodeIndex: index,
473
569
  kind: opcode.kind,
@@ -480,7 +576,7 @@ async function executeOpcode(opcode, index, adapter, verifier, breaker, recovery
480
576
  const errorMsg = err instanceof Error ? err.message : String(err);
481
577
  if (isSoft)
482
578
  return softSkipResult(opcode, index, startTime, errorMsg, telemetry);
483
- return handleFailure(opcode, index, adapter, verifier, breaker, recoveryChain, telemetry, healerPatches, options, executionState, variantId, currentVariant, startTime, deadlineMs, globalDeadlineMs, effectiveTimeoutMs, errorMsg);
579
+ return failWithRecovery(errorMsg);
484
580
  }
485
581
  }
486
582
  /** Post-action breathing room (ms) injected between visible interactions
@@ -685,13 +781,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
685
781
  // semantic loaders/DOM-quiet + bounded pixel fallback); fall back to the
686
782
  // legacy smart-wait for adapters that don't implement it. A page that
687
783
  // never fully settles (e.g. a perpetual animation) is captured anyway.
688
- const stabilize = (maxWaitMs) => adapter.waitForVisuallyStable
689
- ? adapter.waitForVisuallyStable({ maxWaitMs })
690
- : smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
691
- stable: r.stable,
692
- reason: r.waitedFor.join(', ') || 'unknown',
693
- }));
694
- const stability = await stabilize(5000);
784
+ const stability = await stabilizeForCapture(adapter, 5000);
695
785
  if (!stability.stable) {
696
786
  logger.debug(`[opcode ${opcodeIndex}] capturing despite unstable page: ${stability.reason}`);
697
787
  }
@@ -709,85 +799,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
709
799
  }
710
800
  const captureLowConfidenceReason = lowConfidenceReasons.join('; ') || undefined;
711
801
  const captureUrl = await adapter.getCurrentUrl();
712
- const takeBuffer = async () => {
713
- if (opcode.elementSelector && adapter.takeElementScreenshot) {
714
- return adapter.takeElementScreenshot(opcode.elementSelector, opcode.outscale);
715
- }
716
- if (opcode.elementSelector) {
717
- throw new Error(`element capture requires adapter support for selector "${opcode.elementSelector}"`);
718
- }
719
- return adapter.takeScreenshot();
720
- };
721
- let buffer = await takeBuffer();
722
- if (runOptions?.llmConfig) {
723
- const verification = await verifyCaptureQuality(buffer, {
724
- expectedDescription: opcode.description,
725
- url: captureUrl,
726
- locale: currentVariant?.locale,
727
- theme: currentVariant?.theme,
728
- }, runOptions.llmConfig);
729
- if (verification.llmResult) {
730
- telemetry.llmCallCount++;
731
- telemetry.llmCostEur += verification.llmResult.costEur;
732
- telemetry.llmStepUsages.push({
733
- stepType: 'capture_verification',
734
- generationId: verification.llmResult.generationId,
735
- model: verification.llmResult.model,
736
- promptTokens: verification.llmResult.promptTokens,
737
- completionTokens: verification.llmResult.completionTokens,
738
- });
739
- }
740
- if (!verification.passed) {
741
- await stabilize(8000);
742
- const retryBuffer = await takeBuffer();
743
- const retryVerification = await verifyCaptureQuality(retryBuffer, {
744
- expectedDescription: opcode.description,
745
- url: captureUrl,
746
- locale: currentVariant?.locale,
747
- theme: currentVariant?.theme,
748
- }, runOptions.llmConfig);
749
- if (retryVerification.llmResult) {
750
- telemetry.llmCallCount++;
751
- telemetry.llmCostEur += retryVerification.llmResult.costEur;
752
- telemetry.llmStepUsages.push({
753
- stepType: 'capture_verification',
754
- generationId: retryVerification.llmResult.generationId,
755
- model: retryVerification.llmResult.model,
756
- promptTokens: retryVerification.llmResult.promptTokens,
757
- completionTokens: retryVerification.llmResult.completionTokens,
758
- });
759
- }
760
- if (retryVerification.passed) {
761
- buffer = retryBuffer;
762
- }
763
- }
764
- }
765
- let altText;
766
- if (runOptions?.llmConfig) {
767
- try {
768
- const altResult = await generateAltText(buffer, {
769
- description: opcode.description,
770
- url: captureUrl,
771
- locale: currentVariant?.locale,
772
- presetName: runOptions.presetName,
773
- }, runOptions.llmConfig);
774
- altText = altResult.altText;
775
- if (altResult.llmResult) {
776
- telemetry.llmCallCount++;
777
- telemetry.llmCostEur += altResult.llmResult.costEur;
778
- telemetry.llmStepUsages.push({
779
- stepType: 'alt_text_generation',
780
- generationId: altResult.llmResult.generationId,
781
- model: altResult.llmResult.model,
782
- promptTokens: altResult.llmResult.promptTokens,
783
- completionTokens: altResult.llmResult.completionTokens,
784
- });
785
- }
786
- }
787
- catch {
788
- // Alt text generation failed — non-fatal
789
- }
790
- }
802
+ const buffer = await takeCaptureBuffer(adapter, opcode);
791
803
  // Extract page favicon for browser bar mockup
792
804
  let tabIconData;
793
805
  let tabIconMimeType;
@@ -817,7 +829,7 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
817
829
  captureId: opcode.captureId,
818
830
  captureName: opcode.captureName ?? opcode.description,
819
831
  elementSelector: opcode.elementSelector,
820
- altText,
832
+ altText: undefined,
821
833
  stepDescription: opcode.description,
822
834
  stepIndex: opcodeIndex,
823
835
  variantId: currentVariant?.id,
@@ -826,7 +838,11 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
826
838
  lowConfidence: captureLowConfidence || undefined,
827
839
  lowConfidenceReason: captureLowConfidence ? captureLowConfidenceReason : undefined,
828
840
  });
829
- break;
841
+ // Deliver the screenshot artifact NOW. LLM enrichment (quality
842
+ // verification + alt text) runs afterwards in `executeOpcode`, off this
843
+ // timed action path, and mutates the artifact in place — it can never
844
+ // void a captured screenshot or push the action past its timeout.
845
+ return { success: true, captureArtifactIndex: artifacts.length - 1, captureUrl };
830
846
  }
831
847
  case 'BEGIN_CLIP': {
832
848
  if (executionState.activeClip) {
@@ -892,6 +908,127 @@ async function executeOpcodeAction(opcode, opcodeIndex, adapter, artifacts, tele
892
908
  };
893
909
  }
894
910
  }
911
+ // ── Capture: deterministic capture + best-effort enrichment ─────────
912
+ /**
913
+ * Re-stabilize budget for the verification retry. Sized for the post-action
914
+ * enrichment budget (under the ~30s global deadline), not the tight per-opcode
915
+ * action timeout — it only needs to clear transient skeletons/spinners before a
916
+ * second look. Was 8000ms when this ran inside the 10s action; that starved the
917
+ * capture and is exactly what this separation removes.
918
+ */
919
+ const VERIFY_RETRY_STABILIZE_MS = 3000;
920
+ /** Visual stabilization shared by the capture action and the verify retry.
921
+ * Never fails the capture — a page that never settles is captured anyway. */
922
+ function stabilizeForCapture(adapter, maxWaitMs) {
923
+ return adapter.waitForVisuallyStable
924
+ ? adapter.waitForVisuallyStable({ maxWaitMs })
925
+ : smartWaitForStability(adapter, { maxWaitMs }).then((r) => ({
926
+ stable: r.stable,
927
+ reason: r.waitedFor.join(', ') || 'unknown',
928
+ }));
929
+ }
930
+ /** Take the screenshot buffer for a CAPTURE_SCREENSHOT opcode (element clip or
931
+ * full page). Shared by the capture action and the verify retry. */
932
+ function takeCaptureBuffer(adapter, opcode) {
933
+ if (opcode.elementSelector && adapter.takeElementScreenshot) {
934
+ return adapter.takeElementScreenshot(opcode.elementSelector, opcode.outscale);
935
+ }
936
+ if (opcode.elementSelector) {
937
+ throw new Error(`element capture requires adapter support for selector "${opcode.elementSelector}"`);
938
+ }
939
+ return adapter.takeScreenshot();
940
+ }
941
+ function recordCaptureVerificationTelemetry(telemetry, llmResult) {
942
+ if (!llmResult)
943
+ return;
944
+ telemetry.llmCallCount++;
945
+ telemetry.llmCostEur += llmResult.costEur;
946
+ telemetry.llmStepUsages.push({
947
+ stepType: 'capture_verification',
948
+ generationId: llmResult.generationId,
949
+ model: llmResult.model,
950
+ promptTokens: llmResult.promptTokens,
951
+ completionTokens: llmResult.completionTokens,
952
+ });
953
+ }
954
+ /**
955
+ * Best-effort LLM enrichment of an already-delivered screenshot artifact:
956
+ * quality verification (with one budget-bounded re-stabilize + re-shoot) and
957
+ * alt text. Runs AFTER the deterministic capture action, off its timed path,
958
+ * and mutates the artifact in place. By construction it never changes
959
+ * `artifacts.length`, so it can never turn a captured screenshot into a
960
+ * "no artifact" failure — the regression this fixes.
961
+ *
962
+ * Bounded by the per-opcode global wait deadline. If the budget elapses
963
+ * mid-call the in-flight LLM promise is orphaned; `settled` makes any late
964
+ * in-place write a no-op so it cannot mutate an artifact the runner moved past.
965
+ */
966
+ async function enrichCaptureArtifact(artifact, opcode, adapter, captureUrl, currentVariant, runOptions, telemetry, globalDeadlineMs) {
967
+ const llmConfig = runOptions.llmConfig;
968
+ if (!llmConfig)
969
+ return;
970
+ const budgetMs = getRemainingTimeMs(globalDeadlineMs);
971
+ if (budgetMs <= 0)
972
+ return;
973
+ let settled = false;
974
+ const verificationContext = {
975
+ expectedDescription: opcode.description,
976
+ url: captureUrl,
977
+ locale: currentVariant?.locale,
978
+ theme: currentVariant?.theme,
979
+ };
980
+ try {
981
+ await withTimeout(async () => {
982
+ const verification = await verifyCaptureQuality(artifact.buffer, verificationContext, llmConfig);
983
+ recordCaptureVerificationTelemetry(telemetry, verification.llmResult);
984
+ // On a failed verdict, give the page a brief settle and re-shoot once,
985
+ // bounded by the remaining global budget. Swap the buffer in place only if
986
+ // the second shot verifies clean.
987
+ if (!verification.passed) {
988
+ const retryStabilizeMs = Math.min(VERIFY_RETRY_STABILIZE_MS, getRemainingTimeMs(globalDeadlineMs));
989
+ if (retryStabilizeMs > 0) {
990
+ await stabilizeForCapture(adapter, retryStabilizeMs);
991
+ const retryBuffer = await takeCaptureBuffer(adapter, opcode);
992
+ const retryVerification = await verifyCaptureQuality(retryBuffer, verificationContext, llmConfig);
993
+ recordCaptureVerificationTelemetry(telemetry, retryVerification.llmResult);
994
+ if (retryVerification.passed && !settled) {
995
+ artifact.buffer = retryBuffer;
996
+ }
997
+ }
998
+ }
999
+ try {
1000
+ const altResult = await generateAltText(artifact.buffer, {
1001
+ description: opcode.description,
1002
+ url: captureUrl,
1003
+ locale: currentVariant?.locale,
1004
+ presetName: runOptions.presetName,
1005
+ }, llmConfig);
1006
+ if (!settled)
1007
+ artifact.altText = altResult.altText;
1008
+ if (altResult.llmResult) {
1009
+ telemetry.llmCallCount++;
1010
+ telemetry.llmCostEur += altResult.llmResult.costEur;
1011
+ telemetry.llmStepUsages.push({
1012
+ stepType: 'alt_text_generation',
1013
+ generationId: altResult.llmResult.generationId,
1014
+ model: altResult.llmResult.model,
1015
+ promptTokens: altResult.llmResult.promptTokens,
1016
+ completionTokens: altResult.llmResult.completionTokens,
1017
+ });
1018
+ }
1019
+ }
1020
+ catch {
1021
+ // Alt text generation failed — non-fatal.
1022
+ }
1023
+ }, budgetMs);
1024
+ }
1025
+ catch (err) {
1026
+ logger.debug(`[opcode CAPTURE_SCREENSHOT] enrichment skipped: ${err instanceof Error ? err.message : String(err)}`);
1027
+ }
1028
+ finally {
1029
+ settled = true;
1030
+ }
1031
+ }
895
1032
  /**
896
1033
  * Snapshot per-opcode timing + element bbox before the action runs. Returns
897
1034
  * null when no timing should be emitted (mediaMode != video, or no active
@@ -100,8 +100,8 @@ function compileRoutePattern(pattern) {
100
100
  // Support glob-like patterns: ** matches anything (incl. slashes / empty),
101
101
  // * matches a single path segment, ? matches one non-slash char.
102
102
  // Tokenize in one pass so the `*` rewrite doesn't clobber the `*` produced
103
- // by the `**` rewrite (e.g. `/home**` must compile to `^/home.*$`, not
104
- // `^/home.[^/]*$` which would reject `/home` itself).
103
+ // by the `**` rewrite (e.g. `/home**` must compile to `/home.*`, not
104
+ // `/home.[^/]*` which would reject `/home` itself).
105
105
  let regexStr = '';
106
106
  for (let i = 0; i < pattern.length; i++) {
107
107
  const ch = pattern[i];
@@ -122,7 +122,14 @@ function compileRoutePattern(pattern) {
122
122
  regexStr += ch;
123
123
  }
124
124
  }
125
- return new RegExp(`^${regexStr}$`);
125
+ // Substring (contains) match — NOT anchored. Generated programs author bare
126
+ // patterns that are either a prefix of the real path (`/projects/` ⊂
127
+ // `/projects/<id>`) or a nested segment (`/tracking` ⊂
128
+ // `/projects/<id>/tracking`). An anchored `^…$` could match neither, which
129
+ // surfaced as a misleading "page stuck, no progress" failure after the
130
+ // navigation had actually succeeded. Callers needing strict matching pass an
131
+ // anchored regex (handled above).
132
+ return new RegExp(regexStr);
126
133
  }
127
134
  async function checkElementVisible(adapter, selector) {
128
135
  // Primary check: use Playwright waitFor (fast, reliable)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.9.2",
3
+ "version": "1.9.4",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",