visual-ai-assertions 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  VisualAIProviderError: () => VisualAIProviderError,
55
55
  VisualAIRateLimitError: () => VisualAIRateLimitError,
56
56
  VisualAIResponseParseError: () => VisualAIResponseParseError,
57
+ VisualAITruncationError: () => VisualAITruncationError,
57
58
  assertVisualCompareResult: () => assertVisualCompareResult,
58
59
  assertVisualResult: () => assertVisualResult,
59
60
  formatCheckResult: () => formatCheckResult,
@@ -85,6 +86,7 @@ var Model = {
85
86
  },
86
87
  Google: {
87
88
  GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
89
+ GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
88
90
  GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
89
91
  }
90
92
  };
@@ -94,12 +96,18 @@ var DEFAULT_MODELS = {
94
96
  [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
95
97
  };
96
98
  var DEFAULT_MAX_TOKENS = 4096;
99
+ var OPENAI_REASONING_MAX_TOKENS = 16384;
97
100
  var MODEL_TO_PROVIDER = new Map([
98
101
  ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
99
102
  ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
100
103
  ...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
101
104
  ]);
102
105
  var VALID_PROVIDERS = Object.values(Provider);
106
+ var PROVIDER_DEFAULT_REASONING = {
107
+ openai: "medium",
108
+ anthropic: "off",
109
+ google: "off"
110
+ };
103
111
  var Content = {
104
112
  /** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
105
113
  PLACEHOLDER_TEXT: "placeholder-text",
@@ -173,6 +181,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
173
181
  this.rawResponse = rawResponse;
174
182
  }
175
183
  };
184
+ var VisualAITruncationError = class extends VisualAIError {
185
+ partialResponse;
186
+ maxTokens;
187
+ constructor(message, partialResponse, maxTokens) {
188
+ super(message, "RESPONSE_TRUNCATED");
189
+ this.name = "VisualAITruncationError";
190
+ this.partialResponse = partialResponse;
191
+ this.maxTokens = maxTokens;
192
+ }
193
+ };
176
194
  var VisualAIConfigError = class extends VisualAIError {
177
195
  constructor(message) {
178
196
  super(message, "CONFIG_INVALID");
@@ -188,7 +206,7 @@ var VisualAIAssertionError = class extends VisualAIError {
188
206
  }
189
207
  };
190
208
  function isVisualAIKnownError(error) {
191
- return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
209
+ return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
192
210
  }
193
211
 
194
212
  // src/core/prompt.ts
@@ -202,12 +220,18 @@ Each issue must have:
202
220
  - "description": what the issue is
203
221
  - "suggestion": how to fix or improve it
204
222
  `;
205
- var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
223
+ var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
224
+ 1. First, evaluate EACH statement independently and populate the "statements" array
225
+ 2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
226
+ 3. Write "reasoning" as a brief overall summary of the evaluation
227
+ 4. Include "issues" only for statements that failed
228
+
229
+ Respond with a JSON object matching this exact structure:
206
230
  {
207
- "pass": boolean, // true ONLY if ALL statements are true
208
- "reasoning": string, // brief overall summary (e.g. "3 of 4 checks passed...")
209
- "issues": [...], // list of issues found (empty if all pass)
210
- "statements": [ // one entry per statement, in order
231
+ "pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
232
+ "reasoning": string, // brief overall summary of the evaluation
233
+ "issues": [...], // one issue per failing statement (empty if all pass)
234
+ "statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
211
235
  {
212
236
  "statement": string, // the original statement text
213
237
  "pass": boolean, // whether this statement is true
@@ -226,7 +250,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
226
250
  Example for a failing check:
227
251
  {
228
252
  "pass": false,
229
- "reasoning": "1 of 2 checks failed. The submit button is not visible.",
253
+ "reasoning": "The submit button is not visible on the page.",
230
254
  "issues": [
231
255
  { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
232
256
  ],
@@ -486,7 +510,7 @@ var AnthropicDriver = class {
486
510
  this.client = new Anthropic({ apiKey });
487
511
  return this.client;
488
512
  }
489
- async sendMessage(images, prompt) {
513
+ async sendMessage(images, prompt, _options) {
490
514
  const client = await this.getClient();
491
515
  const imageBlocks = images.map((img) => ({
492
516
  type: "image",
@@ -516,6 +540,13 @@ var AnthropicDriver = class {
516
540
  const message = await client.messages.create(requestParams);
517
541
  const textBlock = message.content.find((block) => block.type === "text");
518
542
  const text = textBlock?.text ?? "";
543
+ if (message.stop_reason === "max_tokens") {
544
+ throw new VisualAITruncationError(
545
+ `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
546
+ text,
547
+ this.maxTokens
548
+ );
549
+ }
519
550
  return {
520
551
  text,
521
552
  usage: {
@@ -524,6 +555,7 @@ var AnthropicDriver = class {
524
555
  }
525
556
  };
526
557
  } catch (err) {
558
+ if (err instanceof VisualAITruncationError) throw err;
527
559
  throw mapProviderError(err);
528
560
  }
529
561
  }
@@ -535,11 +567,11 @@ function needsCodeExecution(model) {
535
567
  const match = model.match(/^gemini-(\d+)/);
536
568
  return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
537
569
  }
538
- var GOOGLE_THINKING_BUDGET = {
539
- low: 1024,
540
- medium: 8192,
541
- high: 24576,
542
- xhigh: 24576
570
+ var GOOGLE_THINKING_LEVEL = {
571
+ low: "minimal",
572
+ medium: "low",
573
+ high: "medium",
574
+ xhigh: "high"
543
575
  };
544
576
  var GoogleDriver = class {
545
577
  client;
@@ -579,7 +611,7 @@ var GoogleDriver = class {
579
611
  this.client = new GoogleGenAI({ apiKey });
580
612
  return this.client;
581
613
  }
582
- async sendMessage(images, prompt) {
614
+ async sendMessage(images, prompt, _options) {
583
615
  const client = await this.getClient();
584
616
  try {
585
617
  const response = await client.models.generateContent({
@@ -590,20 +622,36 @@ var GoogleDriver = class {
590
622
  maxOutputTokens: this.maxTokens,
591
623
  ...this.reasoningEffort && {
592
624
  thinkingConfig: {
593
- thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
625
+ thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
594
626
  }
595
627
  }
596
628
  }
597
629
  });
630
+ const finishReason = response.candidates?.[0]?.finishReason;
631
+ if (finishReason === "MAX_TOKENS") {
632
+ throw new VisualAITruncationError(
633
+ `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
634
+ response.text ?? "",
635
+ this.maxTokens
636
+ );
637
+ }
638
+ if (finishReason && finishReason !== "STOP") {
639
+ throw new VisualAIProviderError(
640
+ `Response blocked: Google returned finishReason "${finishReason}".`
641
+ );
642
+ }
598
643
  const text = response.text ?? "";
644
+ const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
599
645
  return {
600
646
  text,
601
647
  usage: response.usageMetadata ? {
602
648
  inputTokens: response.usageMetadata.promptTokenCount ?? 0,
603
- outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
649
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
650
+ ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
604
651
  } : void 0
605
652
  };
606
653
  } catch (err) {
654
+ if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
607
655
  throw mapProviderError(err);
608
656
  }
609
657
  }
@@ -675,17 +723,25 @@ var OpenAIDriver = class {
675
723
  this.client = new OpenAI({ apiKey });
676
724
  return this.client;
677
725
  }
678
- async sendMessage(images, prompt) {
726
+ async sendMessage(images, prompt, options) {
679
727
  const client = await this.getClient();
680
728
  const imageBlocks = images.map((img) => ({
681
729
  type: "input_image",
682
730
  image_url: `data:${img.mimeType};base64,${img.base64}`
683
731
  }));
684
732
  try {
733
+ const format = options?.responseSchema ? {
734
+ type: "json_schema",
735
+ json_schema: {
736
+ name: "visual_ai_response",
737
+ strict: true,
738
+ schema: options.responseSchema
739
+ }
740
+ } : { type: "json_object" };
685
741
  const requestParams = {
686
742
  model: this.model,
687
743
  max_output_tokens: this.maxTokens,
688
- text: { format: { type: "json_object" } },
744
+ text: { format },
689
745
  input: [
690
746
  {
691
747
  role: "user",
@@ -697,15 +753,26 @@ var OpenAIDriver = class {
697
753
  requestParams.reasoning = { effort: this.reasoningEffort };
698
754
  }
699
755
  const response = await client.responses.create(requestParams);
756
+ if (response.status && response.status !== "completed") {
757
+ const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
758
+ throw new VisualAITruncationError(
759
+ `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
760
+ response.output_text ?? "",
761
+ this.maxTokens
762
+ );
763
+ }
700
764
  const text = response.output_text ?? "";
765
+ const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
701
766
  return {
702
767
  text,
703
768
  usage: response.usage ? {
704
769
  inputTokens: response.usage.input_tokens,
705
- outputTokens: response.usage.output_tokens
770
+ outputTokens: response.usage.output_tokens,
771
+ ...reasoningTokens !== void 0 && { reasoningTokens }
706
772
  } : void 0
707
773
  };
708
774
  } catch (err) {
775
+ if (err instanceof VisualAITruncationError) throw err;
709
776
  throw mapProviderError(err);
710
777
  }
711
778
  }
@@ -752,16 +819,40 @@ function parseBooleanEnv(envName, value) {
752
819
  `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
753
820
  );
754
821
  }
822
+ var debugDeprecationWarned = false;
755
823
  function resolveConfig(config) {
756
824
  const provider = resolveProvider(config);
757
825
  const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
826
+ const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
827
+ const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
828
+ const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
829
+ if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
830
+ debugDeprecationWarned = true;
831
+ process.stderr.write(
832
+ `[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
833
+ `
834
+ );
835
+ }
836
+ const userSetMaxTokens = config.maxTokens !== void 0;
837
+ let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
838
+ if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
839
+ maxTokens = OPENAI_REASONING_MAX_TOKENS;
840
+ if (debug) {
841
+ process.stderr.write(
842
+ `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
843
+ `
844
+ );
845
+ }
846
+ }
758
847
  return {
759
848
  provider,
760
849
  apiKey: config.apiKey,
761
850
  model,
762
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
851
+ maxTokens,
763
852
  reasoningEffort: config.reasoningEffort,
764
- debug: config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false,
853
+ debug,
854
+ debugPrompt,
855
+ debugResponse,
765
856
  trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
766
857
  };
767
858
  }
@@ -809,6 +900,10 @@ var PRICING_TABLE = {
809
900
  inputPricePerToken: 2 / PER_MILLION,
810
901
  outputPricePerToken: 12 / PER_MILLION
811
902
  },
903
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
904
+ inputPricePerToken: 0.25 / PER_MILLION,
905
+ outputPricePerToken: 1.5 / PER_MILLION
906
+ },
812
907
  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
813
908
  inputPricePerToken: 0.5 / PER_MILLION,
814
909
  outputPricePerToken: 3 / PER_MILLION
@@ -822,8 +917,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
822
917
  }
823
918
 
824
919
  // src/core/debug.ts
825
- function debugLog(config, label, data) {
826
- if (config.debug) {
920
+ function debugLog(config, label, data, kind = "error") {
921
+ const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
922
+ if (enabled) {
827
923
  process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
828
924
  `);
829
925
  }
@@ -831,8 +927,10 @@ function debugLog(config, label, data) {
831
927
  function usageLog(config, method, usage) {
832
928
  if (!config.trackUsage) return;
833
929
  const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
930
+ const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
931
+ const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
834
932
  process.stderr.write(
835
- `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
933
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
836
934
  `
837
935
  );
838
936
  }
@@ -842,15 +940,42 @@ function processUsage(method, rawUsage, durationSeconds, config) {
842
940
  const usage = {
843
941
  inputTokens,
844
942
  outputTokens,
943
+ ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
845
944
  estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
846
945
  durationSeconds
847
946
  };
848
947
  usageLog(config, method, usage);
849
948
  return usage;
850
949
  }
851
- async function timedSendMessage(driver, images, prompt) {
950
+ var MAX_RAW_RESPONSE_PREVIEW = 500;
951
+ function formatError(error) {
952
+ if (error instanceof VisualAITruncationError) {
953
+ const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
954
+ return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
955
+ }
956
+ if (error instanceof VisualAIResponseParseError) {
957
+ const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
958
+ return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
959
+ }
960
+ if (error instanceof VisualAIError) {
961
+ return `${error.name} (${error.code}): ${error.message}`;
962
+ }
963
+ if (error instanceof Error) {
964
+ return `${error.name}: ${error.message}`;
965
+ }
966
+ return String(error);
967
+ }
968
+ async function withErrorDebug(config, method, fn) {
969
+ try {
970
+ return await fn();
971
+ } catch (error) {
972
+ debugLog(config, `${method} error`, formatError(error), "error");
973
+ throw error;
974
+ }
975
+ }
976
+ async function timedSendMessage(driver, images, prompt, options) {
852
977
  const start = performance.now();
853
- const response = await driver.sendMessage(images, prompt);
978
+ const response = await driver.sendMessage(images, prompt, options);
854
979
  const durationSeconds = (performance.now() - start) / 1e3;
855
980
  return { ...response, durationSeconds };
856
981
  }
@@ -1090,6 +1215,8 @@ var StatementResultSchema = import_zod.z.object({
1090
1215
  var UsageInfoSchema = import_zod.z.object({
1091
1216
  inputTokens: import_zod.z.number(),
1092
1217
  outputTokens: import_zod.z.number(),
1218
+ /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
1219
+ reasoningTokens: import_zod.z.number().optional(),
1093
1220
  estimatedCost: import_zod.z.number().optional(),
1094
1221
  durationSeconds: import_zod.z.number().nonnegative().optional()
1095
1222
  });
@@ -1143,8 +1270,24 @@ function parseResponse(raw, schema) {
1143
1270
  }
1144
1271
  return result.data;
1145
1272
  }
1273
+ function reconcileCheckResult(result) {
1274
+ if (result.statements.length === 0) {
1275
+ return result;
1276
+ }
1277
+ const passCount = result.statements.filter((s) => s.pass).length;
1278
+ const total = result.statements.length;
1279
+ const computedPass = passCount === total;
1280
+ const countPrefix = `${passCount} of ${total} checks passed`;
1281
+ const reasoning = `${countPrefix}. ${result.reasoning}`;
1282
+ return {
1283
+ ...result,
1284
+ pass: computedPass,
1285
+ reasoning
1286
+ };
1287
+ }
1146
1288
  function parseCheckResponse(raw) {
1147
- return parseResponse(raw, CheckResponseSchema);
1289
+ const result = parseResponse(raw, CheckResponseSchema);
1290
+ return reconcileCheckResult(result);
1148
1291
  }
1149
1292
  function parseAskResponse(raw) {
1150
1293
  return parseResponse(raw, AskResponseSchema);
@@ -1154,6 +1297,12 @@ function parseCompareResponse(raw) {
1154
1297
  }
1155
1298
 
1156
1299
  // src/core/client.ts
1300
+ var import_zod_to_json_schema = require("zod-to-json-schema");
1301
+ function toSchemaOptions(schema) {
1302
+ return {
1303
+ responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
1304
+ };
1305
+ }
1157
1306
  var PROVIDER_REGISTRY = {
1158
1307
  anthropic: (config) => new AnthropicDriver(config),
1159
1308
  openai: (config) => new OpenAIDriver(config),
@@ -1162,6 +1311,9 @@ var PROVIDER_REGISTRY = {
1162
1311
  function createDriver(provider, config) {
1163
1312
  return PROVIDER_REGISTRY[provider](config);
1164
1313
  }
1314
+ var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
1315
+ var askSchemaOptions = toSchemaOptions(AskResponseSchema);
1316
+ var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
1165
1317
  function visualAI(config = {}) {
1166
1318
  const resolvedConfig = resolveConfig(config);
1167
1319
  const driverConfig = {
@@ -1176,16 +1328,18 @@ function visualAI(config = {}) {
1176
1328
  if (elements.length === 0) {
1177
1329
  throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
1178
1330
  }
1179
- const img = await normalizeImage(image);
1180
- const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1181
- debugLog(resolvedConfig, `${methodName} prompt`, prompt);
1182
- const response = await timedSendMessage(driver, [img], prompt);
1183
- debugLog(resolvedConfig, `${methodName} response`, response.text);
1184
- const result = parseCheckResponse(response.text);
1185
- return {
1186
- ...result,
1187
- usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1188
- };
1331
+ return withErrorDebug(resolvedConfig, methodName, async () => {
1332
+ const img = await normalizeImage(image);
1333
+ const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1334
+ debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
1335
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1336
+ debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
1337
+ const result = parseCheckResponse(response.text);
1338
+ return {
1339
+ ...result,
1340
+ usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1341
+ };
1342
+ });
1189
1343
  }
1190
1344
  return {
1191
1345
  async check(image, statements, options) {
@@ -1193,61 +1347,64 @@ function visualAI(config = {}) {
1193
1347
  if (stmts.length === 0) {
1194
1348
  throw new VisualAIConfigError("At least one statement is required for check()");
1195
1349
  }
1196
- const img = await normalizeImage(image);
1197
- const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1198
- debugLog(resolvedConfig, "check prompt", prompt);
1199
- const response = await timedSendMessage(driver, [img], prompt);
1200
- debugLog(resolvedConfig, "check response", response.text);
1201
- const result = parseCheckResponse(response.text);
1202
- return {
1203
- ...result,
1204
- usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1205
- };
1350
+ return withErrorDebug(resolvedConfig, "check", async () => {
1351
+ const img = await normalizeImage(image);
1352
+ const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1353
+ debugLog(resolvedConfig, "check prompt", prompt, "prompt");
1354
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1355
+ debugLog(resolvedConfig, "check response", response.text, "response");
1356
+ const result = parseCheckResponse(response.text);
1357
+ return {
1358
+ ...result,
1359
+ usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1360
+ };
1361
+ });
1206
1362
  },
1207
1363
  async ask(image, userPrompt, options) {
1208
- const img = await normalizeImage(image);
1209
- const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1210
- debugLog(resolvedConfig, "ask prompt", prompt);
1211
- const response = await timedSendMessage(driver, [img], prompt);
1212
- debugLog(resolvedConfig, "ask response", response.text);
1213
- const result = parseAskResponse(response.text);
1214
- return {
1215
- ...result,
1216
- usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1217
- };
1364
+ return withErrorDebug(resolvedConfig, "ask", async () => {
1365
+ const img = await normalizeImage(image);
1366
+ const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1367
+ debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
1368
+ const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
1369
+ debugLog(resolvedConfig, "ask response", response.text, "response");
1370
+ const result = parseAskResponse(response.text);
1371
+ return {
1372
+ ...result,
1373
+ usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1374
+ };
1375
+ });
1218
1376
  },
1219
1377
  async compare(imageA, imageB, options) {
1220
- const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1221
- const prompt = buildComparePrompt({
1222
- userPrompt: options?.prompt,
1223
- instructions: options?.instructions
1224
- });
1225
- debugLog(resolvedConfig, "compare prompt", prompt);
1226
- const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1227
- debugLog(resolvedConfig, "compare response", response.text);
1228
- const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1229
- const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1230
- let diffImage;
1231
- if (effectiveDiffImage) {
1232
- try {
1233
- diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1234
- } catch (err) {
1235
- const msg = err instanceof Error ? err.message : String(err);
1236
- debugLog(resolvedConfig, "ai diff error", msg);
1237
- if (!resolvedConfig.debug) {
1378
+ return withErrorDebug(resolvedConfig, "compare", async () => {
1379
+ const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1380
+ const prompt = buildComparePrompt({
1381
+ userPrompt: options?.prompt,
1382
+ instructions: options?.instructions
1383
+ });
1384
+ debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
1385
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
1386
+ debugLog(resolvedConfig, "compare response", response.text, "response");
1387
+ const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1388
+ const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1389
+ let diffImage;
1390
+ if (effectiveDiffImage) {
1391
+ try {
1392
+ diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1393
+ } catch (err) {
1394
+ const msg = err instanceof Error ? err.message : String(err);
1238
1395
  process.stderr.write(
1239
1396
  `[visual-ai-assertions] warning: diff generation failed: ${msg}
1240
1397
  `
1241
1398
  );
1242
1399
  }
1243
1400
  }
1244
- }
1245
- const result = parseCompareResponse(response.text);
1246
- return {
1247
- ...result,
1248
- ...diffImage ? { diffImage } : {},
1249
- usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1250
- };
1401
+ const result = parseCompareResponse(response.text);
1402
+ return {
1403
+ ...result,
1404
+ ...diffImage ? { diffImage } : {},
1405
+ usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1406
+ };
1407
+ });
1251
1408
  },
1252
1409
  elementsVisible(image, elements, options) {
1253
1410
  return checkElementsVisibility(image, elements, true, options);
@@ -1256,57 +1413,65 @@ function visualAI(config = {}) {
1256
1413
  return checkElementsVisibility(image, elements, false, options);
1257
1414
  },
1258
1415
  async accessibility(image, options) {
1259
- const img = await normalizeImage(image);
1260
- const prompt = buildAccessibilityPrompt(options);
1261
- debugLog(resolvedConfig, "accessibility prompt", prompt);
1262
- const response = await timedSendMessage(driver, [img], prompt);
1263
- debugLog(resolvedConfig, "accessibility response", response.text);
1264
- const result = parseCheckResponse(response.text);
1265
- return {
1266
- ...result,
1267
- usage: processUsage(
1268
- "accessibility",
1269
- response.usage,
1270
- response.durationSeconds,
1271
- resolvedConfig
1272
- )
1273
- };
1416
+ return withErrorDebug(resolvedConfig, "accessibility", async () => {
1417
+ const img = await normalizeImage(image);
1418
+ const prompt = buildAccessibilityPrompt(options);
1419
+ debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
1420
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1421
+ debugLog(resolvedConfig, "accessibility response", response.text, "response");
1422
+ const result = parseCheckResponse(response.text);
1423
+ return {
1424
+ ...result,
1425
+ usage: processUsage(
1426
+ "accessibility",
1427
+ response.usage,
1428
+ response.durationSeconds,
1429
+ resolvedConfig
1430
+ )
1431
+ };
1432
+ });
1274
1433
  },
1275
1434
  async layout(image, options) {
1276
- const img = await normalizeImage(image);
1277
- const prompt = buildLayoutPrompt(options);
1278
- debugLog(resolvedConfig, "layout prompt", prompt);
1279
- const response = await timedSendMessage(driver, [img], prompt);
1280
- debugLog(resolvedConfig, "layout response", response.text);
1281
- const result = parseCheckResponse(response.text);
1282
- return {
1283
- ...result,
1284
- usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1285
- };
1435
+ return withErrorDebug(resolvedConfig, "layout", async () => {
1436
+ const img = await normalizeImage(image);
1437
+ const prompt = buildLayoutPrompt(options);
1438
+ debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
1439
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1440
+ debugLog(resolvedConfig, "layout response", response.text, "response");
1441
+ const result = parseCheckResponse(response.text);
1442
+ return {
1443
+ ...result,
1444
+ usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1445
+ };
1446
+ });
1286
1447
  },
1287
1448
  async pageLoad(image, options) {
1288
- const img = await normalizeImage(image);
1289
- const prompt = buildPageLoadPrompt(options);
1290
- debugLog(resolvedConfig, "pageLoad prompt", prompt);
1291
- const response = await timedSendMessage(driver, [img], prompt);
1292
- debugLog(resolvedConfig, "pageLoad response", response.text);
1293
- const result = parseCheckResponse(response.text);
1294
- return {
1295
- ...result,
1296
- usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1297
- };
1449
+ return withErrorDebug(resolvedConfig, "pageLoad", async () => {
1450
+ const img = await normalizeImage(image);
1451
+ const prompt = buildPageLoadPrompt(options);
1452
+ debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
1453
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1454
+ debugLog(resolvedConfig, "pageLoad response", response.text, "response");
1455
+ const result = parseCheckResponse(response.text);
1456
+ return {
1457
+ ...result,
1458
+ usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1459
+ };
1460
+ });
1298
1461
  },
1299
1462
  async content(image, options) {
1300
- const img = await normalizeImage(image);
1301
- const prompt = buildContentPrompt(options);
1302
- debugLog(resolvedConfig, "content prompt", prompt);
1303
- const response = await timedSendMessage(driver, [img], prompt);
1304
- debugLog(resolvedConfig, "content response", response.text);
1305
- const result = parseCheckResponse(response.text);
1306
- return {
1307
- ...result,
1308
- usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1309
- };
1463
+ return withErrorDebug(resolvedConfig, "content", async () => {
1464
+ const img = await normalizeImage(image);
1465
+ const prompt = buildContentPrompt(options);
1466
+ debugLog(resolvedConfig, "content prompt", prompt, "prompt");
1467
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1468
+ debugLog(resolvedConfig, "content response", response.text, "response");
1469
+ const result = parseCheckResponse(response.text);
1470
+ return {
1471
+ ...result,
1472
+ usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1473
+ };
1474
+ });
1310
1475
  }
1311
1476
  };
1312
1477
  }
@@ -1391,6 +1556,7 @@ function assertVisualCompareResult(result, label) {
1391
1556
  VisualAIProviderError,
1392
1557
  VisualAIRateLimitError,
1393
1558
  VisualAIResponseParseError,
1559
+ VisualAITruncationError,
1394
1560
  assertVisualCompareResult,
1395
1561
  assertVisualResult,
1396
1562
  formatCheckResult,