npm - visual-ai-assertions - Versions diffs - 0.5.0 → 0.7.0 - Mend

visual-ai-assertions 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.js CHANGED Viewed

@@ -20,6 +20,7 @@ var Model = {
   },
   Google: {
     GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
+    GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
     GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
   }
 };
@@ -29,12 +30,18 @@ var DEFAULT_MODELS = {
   [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
 };
 var DEFAULT_MAX_TOKENS = 4096;
+var OPENAI_REASONING_MAX_TOKENS = 16384;
 var MODEL_TO_PROVIDER = new Map([
   ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
   ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
   ...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
 ]);
 var VALID_PROVIDERS = Object.values(Provider);
+var PROVIDER_DEFAULT_REASONING = {
+  openai: "medium",
+  anthropic: "off",
+  google: "off"
+};
 var Content = {
   /** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
   PLACEHOLDER_TEXT: "placeholder-text",
@@ -108,6 +115,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
     this.rawResponse = rawResponse;
   }
 };
+var VisualAITruncationError = class extends VisualAIError {
+  partialResponse;
+  maxTokens;
+  constructor(message, partialResponse, maxTokens) {
+    super(message, "RESPONSE_TRUNCATED");
+    this.name = "VisualAITruncationError";
+    this.partialResponse = partialResponse;
+    this.maxTokens = maxTokens;
+  }
+};
 var VisualAIConfigError = class extends VisualAIError {
   constructor(message) {
     super(message, "CONFIG_INVALID");
@@ -123,7 +140,7 @@ var VisualAIAssertionError = class extends VisualAIError {
   }
 };
 function isVisualAIKnownError(error) {
-  return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
+  return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
 }
 // src/core/prompt.ts
@@ -137,12 +154,18 @@ Each issue must have:
 - "description": what the issue is
 - "suggestion": how to fix or improve it
 `;
-var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
+var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
+1. First, evaluate EACH statement independently and populate the "statements" array
+2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
+3. Write "reasoning" as a brief overall summary of the evaluation
+4. Include "issues" only for statements that failed
+Respond with a JSON object matching this exact structure:
 {
-  "pass": boolean,          // true ONLY if ALL statements are true
-  "reasoning": string,      // brief overall summary (e.g. "3 of 4 checks passed...")
-  "issues": [...],          // list of issues found (empty if all pass)
-  "statements": [           // one entry per statement, in order
+  "pass": boolean,          // true ONLY if ALL statements passed \u2014 derive from statements array
+  "reasoning": string,      // brief overall summary of the evaluation
+  "issues": [...],          // one issue per failing statement (empty if all pass)
+  "statements": [           // one entry per statement, in order \u2014 evaluate these FIRST
     {
       "statement": string,  // the original statement text
       "pass": boolean,      // whether this statement is true
@@ -161,7 +184,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
 Example for a failing check:
 {
   "pass": false,
-  "reasoning": "1 of 2 checks failed. The submit button is not visible.",
+  "reasoning": "The submit button is not visible on the page.",
   "issues": [
     { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
   ],
@@ -421,7 +444,7 @@ var AnthropicDriver = class {
     this.client = new Anthropic({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, _options) {
     const client = await this.getClient();
     const imageBlocks = images.map((img) => ({
       type: "image",
@@ -451,6 +474,13 @@ var AnthropicDriver = class {
       const message = await client.messages.create(requestParams);
       const textBlock = message.content.find((block) => block.type === "text");
       const text = textBlock?.text ?? "";
+      if (message.stop_reason === "max_tokens") {
+        throw new VisualAITruncationError(
+          `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
+          text,
+          this.maxTokens
+        );
+      }
       return {
         text,
         usage: {
@@ -459,6 +489,7 @@ var AnthropicDriver = class {
         }
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -470,11 +501,11 @@ function needsCodeExecution(model) {
   const match = model.match(/^gemini-(\d+)/);
   return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
 }
-var GOOGLE_THINKING_BUDGET = {
-  low: 1024,
-  medium: 8192,
-  high: 24576,
-  xhigh: 24576
+var GOOGLE_THINKING_LEVEL = {
+  low: "minimal",
+  medium: "low",
+  high: "medium",
+  xhigh: "high"
 };
 var GoogleDriver = class {
   client;
@@ -514,7 +545,7 @@ var GoogleDriver = class {
     this.client = new GoogleGenAI({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, _options) {
     const client = await this.getClient();
     try {
       const response = await client.models.generateContent({
@@ -525,20 +556,36 @@ var GoogleDriver = class {
           maxOutputTokens: this.maxTokens,
           ...this.reasoningEffort && {
             thinkingConfig: {
-              thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
+              thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
             }
           }
         }
       });
+      const finishReason = response.candidates?.[0]?.finishReason;
+      if (finishReason === "MAX_TOKENS") {
+        throw new VisualAITruncationError(
+          `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
+          response.text ?? "",
+          this.maxTokens
+        );
+      }
+      if (finishReason && finishReason !== "STOP") {
+        throw new VisualAIProviderError(
+          `Response blocked: Google returned finishReason "${finishReason}".`
+        );
+      }
       const text = response.text ?? "";
+      const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
       return {
         text,
         usage: response.usageMetadata ? {
           inputTokens: response.usageMetadata.promptTokenCount ?? 0,
-          outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
+          outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
+          ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
         } : void 0
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -610,17 +657,25 @@ var OpenAIDriver = class {
     this.client = new OpenAI({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, options) {
     const client = await this.getClient();
     const imageBlocks = images.map((img) => ({
       type: "input_image",
       image_url: `data:${img.mimeType};base64,${img.base64}`
     }));
     try {
+      const format = options?.responseSchema ? {
+        type: "json_schema",
+        json_schema: {
+          name: "visual_ai_response",
+          strict: true,
+          schema: options.responseSchema
+        }
+      } : { type: "json_object" };
       const requestParams = {
         model: this.model,
         max_output_tokens: this.maxTokens,
-        text: { format: { type: "json_object" } },
+        text: { format },
         input: [
           {
             role: "user",
@@ -632,15 +687,26 @@ var OpenAIDriver = class {
         requestParams.reasoning = { effort: this.reasoningEffort };
       }
       const response = await client.responses.create(requestParams);
+      if (response.status && response.status !== "completed") {
+        const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
+        throw new VisualAITruncationError(
+          `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
+          response.output_text ?? "",
+          this.maxTokens
+        );
+      }
       const text = response.output_text ?? "";
+      const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
       return {
         text,
         usage: response.usage ? {
           inputTokens: response.usage.input_tokens,
-          outputTokens: response.usage.output_tokens
+          outputTokens: response.usage.output_tokens,
+          ...reasoningTokens !== void 0 && { reasoningTokens }
         } : void 0
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -687,16 +753,40 @@ function parseBooleanEnv(envName, value) {
     `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
   );
 }
+var debugDeprecationWarned = false;
 function resolveConfig(config) {
   const provider = resolveProvider(config);
   const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
+  const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
+  const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
+  const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
+  if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
+    debugDeprecationWarned = true;
+    process.stderr.write(
+      `[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
+`
+    );
+  }
+  const userSetMaxTokens = config.maxTokens !== void 0;
+  let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
+  if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
+    maxTokens = OPENAI_REASONING_MAX_TOKENS;
+    if (debug) {
+      process.stderr.write(
+        `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
+`
+      );
+    }
+  }
   return {
     provider,
     apiKey: config.apiKey,
     model,
-    maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
+    maxTokens,
     reasoningEffort: config.reasoningEffort,
-    debug: config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false,
+    debug,
+    debugPrompt,
+    debugResponse,
     trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
   };
 }
@@ -744,6 +834,10 @@ var PRICING_TABLE = {
     inputPricePerToken: 2 / PER_MILLION,
     outputPricePerToken: 12 / PER_MILLION
   },
+  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
+    inputPricePerToken: 0.25 / PER_MILLION,
+    outputPricePerToken: 1.5 / PER_MILLION
+  },
   [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
     inputPricePerToken: 0.5 / PER_MILLION,
     outputPricePerToken: 3 / PER_MILLION
@@ -757,8 +851,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
 }
 // src/core/debug.ts
-function debugLog(config, label, data) {
-  if (config.debug) {
+function debugLog(config, label, data, kind = "error") {
+  const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
+  if (enabled) {
     process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
 `);
   }
@@ -766,8 +861,10 @@ function debugLog(config, label, data) {
 function usageLog(config, method, usage) {
   if (!config.trackUsage) return;
   const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
+  const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
+  const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
   process.stderr.write(
-    `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
+    `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
 `
   );
 }
@@ -777,15 +874,42 @@ function processUsage(method, rawUsage, durationSeconds, config) {
   const usage = {
     inputTokens,
     outputTokens,
+    ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
     estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
     durationSeconds
   };
   usageLog(config, method, usage);
   return usage;
 }
-async function timedSendMessage(driver, images, prompt) {
+var MAX_RAW_RESPONSE_PREVIEW = 500;
+function formatError(error) {
+  if (error instanceof VisualAITruncationError) {
+    const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
+    return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
+  }
+  if (error instanceof VisualAIResponseParseError) {
+    const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
+    return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
+  }
+  if (error instanceof VisualAIError) {
+    return `${error.name} (${error.code}): ${error.message}`;
+  }
+  if (error instanceof Error) {
+    return `${error.name}: ${error.message}`;
+  }
+  return String(error);
+}
+async function withErrorDebug(config, method, fn) {
+  try {
+    return await fn();
+  } catch (error) {
+    debugLog(config, `${method} error`, formatError(error), "error");
+    throw error;
+  }
+}
+async function timedSendMessage(driver, images, prompt, options) {
   const start = performance.now();
-  const response = await driver.sendMessage(images, prompt);
+  const response = await driver.sendMessage(images, prompt, options);
   const durationSeconds = (performance.now() - start) / 1e3;
   return { ...response, durationSeconds };
 }
@@ -1025,6 +1149,8 @@ var StatementResultSchema = z.object({
 var UsageInfoSchema = z.object({
   inputTokens: z.number(),
   outputTokens: z.number(),
+  /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
+  reasoningTokens: z.number().optional(),
   estimatedCost: z.number().optional(),
   durationSeconds: z.number().nonnegative().optional()
 });
@@ -1078,8 +1204,24 @@ function parseResponse(raw, schema) {
   }
   return result.data;
 }
+function reconcileCheckResult(result) {
+  if (result.statements.length === 0) {
+    return result;
+  }
+  const passCount = result.statements.filter((s) => s.pass).length;
+  const total = result.statements.length;
+  const computedPass = passCount === total;
+  const countPrefix = `${passCount} of ${total} checks passed`;
+  const reasoning = `${countPrefix}. ${result.reasoning}`;
+  return {
+    ...result,
+    pass: computedPass,
+    reasoning
+  };
+}
 function parseCheckResponse(raw) {
-  return parseResponse(raw, CheckResponseSchema);
+  const result = parseResponse(raw, CheckResponseSchema);
+  return reconcileCheckResult(result);
 }
 function parseAskResponse(raw) {
   return parseResponse(raw, AskResponseSchema);
@@ -1089,6 +1231,12 @@ function parseCompareResponse(raw) {
 }
 // src/core/client.ts
+import { zodToJsonSchema } from "zod-to-json-schema";
+function toSchemaOptions(schema) {
+  return {
+    responseSchema: zodToJsonSchema(schema, { target: "openAi" })
+  };
+}
 var PROVIDER_REGISTRY = {
   anthropic: (config) => new AnthropicDriver(config),
   openai: (config) => new OpenAIDriver(config),
@@ -1097,6 +1245,9 @@ var PROVIDER_REGISTRY = {
 function createDriver(provider, config) {
   return PROVIDER_REGISTRY[provider](config);
 }
+var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
+var askSchemaOptions = toSchemaOptions(AskResponseSchema);
+var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
 function visualAI(config = {}) {
   const resolvedConfig = resolveConfig(config);
   const driverConfig = {
@@ -1111,16 +1262,18 @@ function visualAI(config = {}) {
     if (elements.length === 0) {
       throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
     }
-    const img = await normalizeImage(image);
-    const prompt = buildElementsVisibilityPrompt(elements, visible, options);
-    debugLog(resolvedConfig, `${methodName} prompt`, prompt);
-    const response = await timedSendMessage(driver, [img], prompt);
-    debugLog(resolvedConfig, `${methodName} response`, response.text);
-    const result = parseCheckResponse(response.text);
-    return {
-      ...result,
-      usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
-    };
+    return withErrorDebug(resolvedConfig, methodName, async () => {
+      const img = await normalizeImage(image);
+      const prompt = buildElementsVisibilityPrompt(elements, visible, options);
+      debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
+      const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+      debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
+      const result = parseCheckResponse(response.text);
+      return {
+        ...result,
+        usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
+      };
+    });
   }
   return {
     async check(image, statements, options) {
@@ -1128,61 +1281,64 @@ function visualAI(config = {}) {
       if (stmts.length === 0) {
         throw new VisualAIConfigError("At least one statement is required for check()");
       }
-      const img = await normalizeImage(image);
-      const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
-      debugLog(resolvedConfig, "check prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "check response", response.text);
-      const result = parseCheckResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
-      };
+      return withErrorDebug(resolvedConfig, "check", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
+        debugLog(resolvedConfig, "check prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+        debugLog(resolvedConfig, "check response", response.text, "response");
+        const result = parseCheckResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     },
     async ask(image, userPrompt, options) {
-      const img = await normalizeImage(image);
-      const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
-      debugLog(resolvedConfig, "ask prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "ask response", response.text);
-      const result = parseAskResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
-      };
+      return withErrorDebug(resolvedConfig, "ask", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
+        debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
+        debugLog(resolvedConfig, "ask response", response.text, "response");
+        const result = parseAskResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     },
     async compare(imageA, imageB, options) {
-      const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
-      const prompt = buildComparePrompt({
-        userPrompt: options?.prompt,
-        instructions: options?.instructions
-      });
-      debugLog(resolvedConfig, "compare prompt", prompt);
-      const response = await timedSendMessage(driver, [imgA, imgB], prompt);
-      debugLog(resolvedConfig, "compare response", response.text);
-      const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
-      const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
-      let diffImage;
-      if (effectiveDiffImage) {
-        try {
-          diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : String(err);
-          debugLog(resolvedConfig, "ai diff error", msg);
-          if (!resolvedConfig.debug) {
+      return withErrorDebug(resolvedConfig, "compare", async () => {
+        const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
+        const prompt = buildComparePrompt({
+          userPrompt: options?.prompt,
+          instructions: options?.instructions
+        });
+        debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
+        debugLog(resolvedConfig, "compare response", response.text, "response");
+        const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
+        const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
+        let diffImage;
+        if (effectiveDiffImage) {
+          try {
+            diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
             process.stderr.write(
               `[visual-ai-assertions] warning: diff generation failed: ${msg}
 `
             );
           }
         }
-      }
-      const result = parseCompareResponse(response.text);
-      return {
-        ...result,
-        ...diffImage ? { diffImage } : {},
-        usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
-      };
+        const result = parseCompareResponse(response.text);
+        return {
+          ...result,
+          ...diffImage ? { diffImage } : {},
+          usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     },
     elementsVisible(image, elements, options) {
       return checkElementsVisibility(image, elements, true, options);
@@ -1191,57 +1347,65 @@ function visualAI(config = {}) {
       return checkElementsVisibility(image, elements, false, options);
     },
     async accessibility(image, options) {
-      const img = await normalizeImage(image);
-      const prompt = buildAccessibilityPrompt(options);
-      debugLog(resolvedConfig, "accessibility prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "accessibility response", response.text);
-      const result = parseCheckResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage(
-          "accessibility",
-          response.usage,
-          response.durationSeconds,
-          resolvedConfig
-        )
-      };
+      return withErrorDebug(resolvedConfig, "accessibility", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildAccessibilityPrompt(options);
+        debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+        debugLog(resolvedConfig, "accessibility response", response.text, "response");
+        const result = parseCheckResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage(
+            "accessibility",
+            response.usage,
+            response.durationSeconds,
+            resolvedConfig
+          )
+        };
+      });
     },
     async layout(image, options) {
-      const img = await normalizeImage(image);
-      const prompt = buildLayoutPrompt(options);
-      debugLog(resolvedConfig, "layout prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "layout response", response.text);
-      const result = parseCheckResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
-      };
+      return withErrorDebug(resolvedConfig, "layout", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildLayoutPrompt(options);
+        debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+        debugLog(resolvedConfig, "layout response", response.text, "response");
+        const result = parseCheckResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     },
     async pageLoad(image, options) {
-      const img = await normalizeImage(image);
-      const prompt = buildPageLoadPrompt(options);
-      debugLog(resolvedConfig, "pageLoad prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "pageLoad response", response.text);
-      const result = parseCheckResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
-      };
+      return withErrorDebug(resolvedConfig, "pageLoad", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildPageLoadPrompt(options);
+        debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+        debugLog(resolvedConfig, "pageLoad response", response.text, "response");
+        const result = parseCheckResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     },
     async content(image, options) {
-      const img = await normalizeImage(image);
-      const prompt = buildContentPrompt(options);
-      debugLog(resolvedConfig, "content prompt", prompt);
-      const response = await timedSendMessage(driver, [img], prompt);
-      debugLog(resolvedConfig, "content response", response.text);
-      const result = parseCheckResponse(response.text);
-      return {
-        ...result,
-        usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
-      };
+      return withErrorDebug(resolvedConfig, "content", async () => {
+        const img = await normalizeImage(image);
+        const prompt = buildContentPrompt(options);
+        debugLog(resolvedConfig, "content prompt", prompt, "prompt");
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
+        debugLog(resolvedConfig, "content response", response.text, "response");
+        const result = parseCheckResponse(response.text);
+        return {
+          ...result,
+          usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
+        };
+      });
     }
   };
 }
@@ -1325,6 +1489,7 @@ export {
   VisualAIProviderError,
   VisualAIRateLimitError,
   VisualAIResponseParseError,
+  VisualAITruncationError,
   assertVisualCompareResult,
   assertVisualResult,
   formatCheckResult,