npm - visual-ai-assertions - Versions diffs - 0.6.0 → 0.7.1 - Mend

visual-ai-assertions 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,10 @@
 // src/constants.ts
+var ReasoningEffort = {
+  LOW: "low",
+  MEDIUM: "medium",
+  HIGH: "high",
+  XHIGH: "xhigh"
+};
 var Provider = {
   ANTHROPIC: "anthropic",
   OPENAI: "openai",
@@ -20,6 +26,7 @@ var Model = {
   },
   Google: {
     GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
+    GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
     GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
   }
 };
@@ -29,6 +36,7 @@ var DEFAULT_MODELS = {
   [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
 };
 var DEFAULT_MAX_TOKENS = 4096;
+var OPENAI_REASONING_MAX_TOKENS = 16384;
 var MODEL_TO_PROVIDER = new Map([
   ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
   ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
@@ -113,6 +121,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
     this.rawResponse = rawResponse;
   }
 };
+var VisualAITruncationError = class extends VisualAIError {
+  partialResponse;
+  maxTokens;
+  constructor(message, partialResponse, maxTokens) {
+    super(message, "RESPONSE_TRUNCATED");
+    this.name = "VisualAITruncationError";
+    this.partialResponse = partialResponse;
+    this.maxTokens = maxTokens;
+  }
+};
 var VisualAIConfigError = class extends VisualAIError {
   constructor(message) {
     super(message, "CONFIG_INVALID");
@@ -128,7 +146,7 @@ var VisualAIAssertionError = class extends VisualAIError {
   }
 };
 function isVisualAIKnownError(error) {
-  return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
+  return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
 }
 // src/core/prompt.ts
@@ -142,12 +160,18 @@ Each issue must have:
 - "description": what the issue is
 - "suggestion": how to fix or improve it
 `;
-var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
+var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
+1. First, evaluate EACH statement independently and populate the "statements" array
+2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
+3. Write "reasoning" as a brief overall summary of the evaluation
+4. Include "issues" only for statements that failed
+Respond with a JSON object matching this exact structure:
 {
-  "pass": boolean,          // true ONLY if ALL statements are true
-  "reasoning": string,      // brief overall summary (e.g. "3 of 4 checks passed...")
-  "issues": [...],          // list of issues found (empty if all pass)
-  "statements": [           // one entry per statement, in order
+  "pass": boolean,          // true ONLY if ALL statements passed \u2014 derive from statements array
+  "reasoning": string,      // brief overall summary of the evaluation
+  "issues": [...],          // one issue per failing statement (empty if all pass)
+  "statements": [           // one entry per statement, in order \u2014 evaluate these FIRST
     {
       "statement": string,  // the original statement text
       "pass": boolean,      // whether this statement is true
@@ -166,7 +190,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
 Example for a failing check:
 {
   "pass": false,
-  "reasoning": "1 of 2 checks failed. The submit button is not visible.",
+  "reasoning": "The submit button is not visible on the page.",
   "issues": [
     { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
   ],
@@ -426,7 +450,7 @@ var AnthropicDriver = class {
     this.client = new Anthropic({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, _options) {
     const client = await this.getClient();
     const imageBlocks = images.map((img) => ({
       type: "image",
@@ -456,6 +480,13 @@ var AnthropicDriver = class {
       const message = await client.messages.create(requestParams);
       const textBlock = message.content.find((block) => block.type === "text");
       const text = textBlock?.text ?? "";
+      if (message.stop_reason === "max_tokens") {
+        throw new VisualAITruncationError(
+          `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
+          text,
+          this.maxTokens
+        );
+      }
       return {
         text,
         usage: {
@@ -464,6 +495,7 @@ var AnthropicDriver = class {
         }
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -475,11 +507,11 @@ function needsCodeExecution(model) {
   const match = model.match(/^gemini-(\d+)/);
   return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
 }
-var GOOGLE_THINKING_BUDGET = {
-  low: 1024,
-  medium: 8192,
-  high: 24576,
-  xhigh: 24576
+var GOOGLE_THINKING_LEVEL = {
+  low: "minimal",
+  medium: "low",
+  high: "medium",
+  xhigh: "high"
 };
 var GoogleDriver = class {
   client;
@@ -519,7 +551,7 @@ var GoogleDriver = class {
     this.client = new GoogleGenAI({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, _options) {
     const client = await this.getClient();
     try {
       const response = await client.models.generateContent({
@@ -530,20 +562,36 @@ var GoogleDriver = class {
           maxOutputTokens: this.maxTokens,
           ...this.reasoningEffort && {
             thinkingConfig: {
-              thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
+              thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
             }
           }
         }
       });
+      const finishReason = response.candidates?.[0]?.finishReason;
+      if (finishReason === "MAX_TOKENS") {
+        throw new VisualAITruncationError(
+          `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
+          response.text ?? "",
+          this.maxTokens
+        );
+      }
+      if (finishReason && finishReason !== "STOP") {
+        throw new VisualAIProviderError(
+          `Response blocked: Google returned finishReason "${finishReason}".`
+        );
+      }
       const text = response.text ?? "";
+      const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
       return {
         text,
         usage: response.usageMetadata ? {
           inputTokens: response.usageMetadata.promptTokenCount ?? 0,
-          outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
+          outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
+          ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
         } : void 0
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -615,17 +663,25 @@ var OpenAIDriver = class {
     this.client = new OpenAI({ apiKey });
     return this.client;
   }
-  async sendMessage(images, prompt) {
+  async sendMessage(images, prompt, options) {
     const client = await this.getClient();
     const imageBlocks = images.map((img) => ({
       type: "input_image",
       image_url: `data:${img.mimeType};base64,${img.base64}`
     }));
     try {
+      const format = options?.responseSchema ? {
+        type: "json_schema",
+        json_schema: {
+          name: "visual_ai_response",
+          strict: true,
+          schema: options.responseSchema
+        }
+      } : { type: "json_object", name: "visual_ai_response" };
       const requestParams = {
         model: this.model,
         max_output_tokens: this.maxTokens,
-        text: { format: { type: "json_object" } },
+        text: { format },
         input: [
           {
             role: "user",
@@ -637,15 +693,26 @@ var OpenAIDriver = class {
         requestParams.reasoning = { effort: this.reasoningEffort };
       }
       const response = await client.responses.create(requestParams);
+      if (response.status && response.status !== "completed") {
+        const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
+        throw new VisualAITruncationError(
+          `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
+          response.output_text ?? "",
+          this.maxTokens
+        );
+      }
       const text = response.output_text ?? "";
+      const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
       return {
         text,
         usage: response.usage ? {
           inputTokens: response.usage.input_tokens,
-          outputTokens: response.usage.output_tokens
+          outputTokens: response.usage.output_tokens,
+          ...reasoningTokens !== void 0 && { reasoningTokens }
         } : void 0
       };
     } catch (err) {
+      if (err instanceof VisualAITruncationError) throw err;
       throw mapProviderError(err);
     }
   }
@@ -692,15 +759,6 @@ function parseBooleanEnv(envName, value) {
     `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
   );
 }
-var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
-function parseReasoningEffortEnv(envName, value) {
-  if (value === void 0 || value === "") return void 0;
-  const lower = value.toLowerCase();
-  if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
-  throw new VisualAIConfigError(
-    `Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
-  );
-}
 var debugDeprecationWarned = false;
 function resolveConfig(config) {
   const provider = resolveProvider(config);
@@ -715,12 +773,23 @@ function resolveConfig(config) {
 `
     );
   }
+  const userSetMaxTokens = config.maxTokens !== void 0;
+  let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
+  if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
+    maxTokens = OPENAI_REASONING_MAX_TOKENS;
+    if (debug) {
+      process.stderr.write(
+        `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
+`
+      );
+    }
+  }
   return {
     provider,
     apiKey: config.apiKey,
     model,
-    maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
-    reasoningEffort: config.reasoningEffort ?? parseReasoningEffortEnv("VISUAL_AI_REASONING_EFFORT", process.env.VISUAL_AI_REASONING_EFFORT),
+    maxTokens,
+    reasoningEffort: config.reasoningEffort,
     debug,
     debugPrompt,
     debugResponse,
@@ -771,6 +840,10 @@ var PRICING_TABLE = {
     inputPricePerToken: 2 / PER_MILLION,
     outputPricePerToken: 12 / PER_MILLION
   },
+  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
+    inputPricePerToken: 0.25 / PER_MILLION,
+    outputPricePerToken: 1.5 / PER_MILLION
+  },
   [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
     inputPricePerToken: 0.5 / PER_MILLION,
     outputPricePerToken: 3 / PER_MILLION
@@ -795,8 +868,9 @@ function usageLog(config, method, usage) {
   if (!config.trackUsage) return;
   const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
   const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
+  const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
   process.stderr.write(
-    `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
+    `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
 `
   );
 }
@@ -806,6 +880,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
   const usage = {
     inputTokens,
     outputTokens,
+    ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
     estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
     durationSeconds
   };
@@ -814,6 +889,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
 }
 var MAX_RAW_RESPONSE_PREVIEW = 500;
 function formatError(error) {
+  if (error instanceof VisualAITruncationError) {
+    const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
+    return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
+  }
   if (error instanceof VisualAIResponseParseError) {
     const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
     return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
@@ -834,9 +913,9 @@ async function withErrorDebug(config, method, fn) {
     throw error;
   }
 }
-async function timedSendMessage(driver, images, prompt) {
+async function timedSendMessage(driver, images, prompt, options) {
   const start = performance.now();
-  const response = await driver.sendMessage(images, prompt);
+  const response = await driver.sendMessage(images, prompt, options);
   const durationSeconds = (performance.now() - start) / 1e3;
   return { ...response, durationSeconds };
 }
@@ -1076,6 +1155,8 @@ var StatementResultSchema = z.object({
 var UsageInfoSchema = z.object({
   inputTokens: z.number(),
   outputTokens: z.number(),
+  /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
+  reasoningTokens: z.number().optional(),
   estimatedCost: z.number().optional(),
   durationSeconds: z.number().nonnegative().optional()
 });
@@ -1129,8 +1210,24 @@ function parseResponse(raw, schema) {
   }
   return result.data;
 }
+function reconcileCheckResult(result) {
+  if (result.statements.length === 0) {
+    return result;
+  }
+  const passCount = result.statements.filter((s) => s.pass).length;
+  const total = result.statements.length;
+  const computedPass = passCount === total;
+  const countPrefix = `${passCount} of ${total} checks passed`;
+  const reasoning = `${countPrefix}. ${result.reasoning}`;
+  return {
+    ...result,
+    pass: computedPass,
+    reasoning
+  };
+}
 function parseCheckResponse(raw) {
-  return parseResponse(raw, CheckResponseSchema);
+  const result = parseResponse(raw, CheckResponseSchema);
+  return reconcileCheckResult(result);
 }
 function parseAskResponse(raw) {
   return parseResponse(raw, AskResponseSchema);
@@ -1140,6 +1237,12 @@ function parseCompareResponse(raw) {
 }
 // src/core/client.ts
+import { zodToJsonSchema } from "zod-to-json-schema";
+function toSchemaOptions(schema) {
+  return {
+    responseSchema: zodToJsonSchema(schema, { target: "openAi" })
+  };
+}
 var PROVIDER_REGISTRY = {
   anthropic: (config) => new AnthropicDriver(config),
   openai: (config) => new OpenAIDriver(config),
@@ -1148,6 +1251,9 @@ var PROVIDER_REGISTRY = {
 function createDriver(provider, config) {
   return PROVIDER_REGISTRY[provider](config);
 }
+var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
+var askSchemaOptions = toSchemaOptions(AskResponseSchema);
+var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
 function visualAI(config = {}) {
   const resolvedConfig = resolveConfig(config);
   const driverConfig = {
@@ -1166,7 +1272,7 @@ function visualAI(config = {}) {
       const img = await normalizeImage(image);
       const prompt = buildElementsVisibilityPrompt(elements, visible, options);
       debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
-      const response = await timedSendMessage(driver, [img], prompt);
+      const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
       debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
       const result = parseCheckResponse(response.text);
       return {
@@ -1185,7 +1291,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
         debugLog(resolvedConfig, "check prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
         debugLog(resolvedConfig, "check response", response.text, "response");
         const result = parseCheckResponse(response.text);
         return {
@@ -1199,7 +1305,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
         debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
         debugLog(resolvedConfig, "ask response", response.text, "response");
         const result = parseAskResponse(response.text);
         return {
@@ -1216,7 +1322,7 @@ function visualAI(config = {}) {
           instructions: options?.instructions
         });
         debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [imgA, imgB], prompt);
+        const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
         debugLog(resolvedConfig, "compare response", response.text, "response");
         const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
         const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
@@ -1251,7 +1357,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildAccessibilityPrompt(options);
         debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
         debugLog(resolvedConfig, "accessibility response", response.text, "response");
         const result = parseCheckResponse(response.text);
         return {
@@ -1270,7 +1376,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildLayoutPrompt(options);
         debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
         debugLog(resolvedConfig, "layout response", response.text, "response");
         const result = parseCheckResponse(response.text);
         return {
@@ -1284,7 +1390,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildPageLoadPrompt(options);
         debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
         debugLog(resolvedConfig, "pageLoad response", response.text, "response");
         const result = parseCheckResponse(response.text);
         return {
@@ -1298,7 +1404,7 @@ function visualAI(config = {}) {
         const img = await normalizeImage(image);
         const prompt = buildContentPrompt(options);
         debugLog(resolvedConfig, "content prompt", prompt, "prompt");
-        const response = await timedSendMessage(driver, [img], prompt);
+        const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
         debugLog(resolvedConfig, "content response", response.text, "response");
         const result = parseCheckResponse(response.text);
         return {
@@ -1379,6 +1485,7 @@ export {
   Layout,
   Model,
   Provider,
+  ReasoningEffort,
   StatementResultSchema,
   UsageInfoSchema,
   VisualAIAssertionError,
@@ -1389,6 +1496,7 @@ export {
   VisualAIProviderError,
   VisualAIRateLimitError,
   VisualAIResponseParseError,
+  VisualAITruncationError,
   assertVisualCompareResult,
   assertVisualResult,
   formatCheckResult,