visual-ai-assertions 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,6 +20,7 @@ var Model = {
20
20
  },
21
21
  Google: {
22
22
  GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
23
+ GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
23
24
  GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
24
25
  }
25
26
  };
@@ -29,12 +30,18 @@ var DEFAULT_MODELS = {
29
30
  [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
30
31
  };
31
32
  var DEFAULT_MAX_TOKENS = 4096;
33
+ var OPENAI_REASONING_MAX_TOKENS = 16384;
32
34
  var MODEL_TO_PROVIDER = new Map([
33
35
  ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
34
36
  ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
35
37
  ...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
36
38
  ]);
37
39
  var VALID_PROVIDERS = Object.values(Provider);
40
+ var PROVIDER_DEFAULT_REASONING = {
41
+ openai: "medium",
42
+ anthropic: "off",
43
+ google: "off"
44
+ };
38
45
  var Content = {
39
46
  /** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
40
47
  PLACEHOLDER_TEXT: "placeholder-text",
@@ -108,6 +115,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
108
115
  this.rawResponse = rawResponse;
109
116
  }
110
117
  };
118
+ var VisualAITruncationError = class extends VisualAIError {
119
+ partialResponse;
120
+ maxTokens;
121
+ constructor(message, partialResponse, maxTokens) {
122
+ super(message, "RESPONSE_TRUNCATED");
123
+ this.name = "VisualAITruncationError";
124
+ this.partialResponse = partialResponse;
125
+ this.maxTokens = maxTokens;
126
+ }
127
+ };
111
128
  var VisualAIConfigError = class extends VisualAIError {
112
129
  constructor(message) {
113
130
  super(message, "CONFIG_INVALID");
@@ -123,7 +140,7 @@ var VisualAIAssertionError = class extends VisualAIError {
123
140
  }
124
141
  };
125
142
  function isVisualAIKnownError(error) {
126
- return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
143
+ return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
127
144
  }
128
145
 
129
146
  // src/core/prompt.ts
@@ -137,12 +154,18 @@ Each issue must have:
137
154
  - "description": what the issue is
138
155
  - "suggestion": how to fix or improve it
139
156
  `;
140
- var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
157
+ var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
158
+ 1. First, evaluate EACH statement independently and populate the "statements" array
159
+ 2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
160
+ 3. Write "reasoning" as a brief overall summary of the evaluation
161
+ 4. Include "issues" only for statements that failed
162
+
163
+ Respond with a JSON object matching this exact structure:
141
164
  {
142
- "pass": boolean, // true ONLY if ALL statements are true
143
- "reasoning": string, // brief overall summary (e.g. "3 of 4 checks passed...")
144
- "issues": [...], // list of issues found (empty if all pass)
145
- "statements": [ // one entry per statement, in order
165
+ "pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
166
+ "reasoning": string, // brief overall summary of the evaluation
167
+ "issues": [...], // one issue per failing statement (empty if all pass)
168
+ "statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
146
169
  {
147
170
  "statement": string, // the original statement text
148
171
  "pass": boolean, // whether this statement is true
@@ -161,7 +184,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
161
184
  Example for a failing check:
162
185
  {
163
186
  "pass": false,
164
- "reasoning": "1 of 2 checks failed. The submit button is not visible.",
187
+ "reasoning": "The submit button is not visible on the page.",
165
188
  "issues": [
166
189
  { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
167
190
  ],
@@ -421,7 +444,7 @@ var AnthropicDriver = class {
421
444
  this.client = new Anthropic({ apiKey });
422
445
  return this.client;
423
446
  }
424
- async sendMessage(images, prompt) {
447
+ async sendMessage(images, prompt, _options) {
425
448
  const client = await this.getClient();
426
449
  const imageBlocks = images.map((img) => ({
427
450
  type: "image",
@@ -451,6 +474,13 @@ var AnthropicDriver = class {
451
474
  const message = await client.messages.create(requestParams);
452
475
  const textBlock = message.content.find((block) => block.type === "text");
453
476
  const text = textBlock?.text ?? "";
477
+ if (message.stop_reason === "max_tokens") {
478
+ throw new VisualAITruncationError(
479
+ `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
480
+ text,
481
+ this.maxTokens
482
+ );
483
+ }
454
484
  return {
455
485
  text,
456
486
  usage: {
@@ -459,6 +489,7 @@ var AnthropicDriver = class {
459
489
  }
460
490
  };
461
491
  } catch (err) {
492
+ if (err instanceof VisualAITruncationError) throw err;
462
493
  throw mapProviderError(err);
463
494
  }
464
495
  }
@@ -470,11 +501,11 @@ function needsCodeExecution(model) {
470
501
  const match = model.match(/^gemini-(\d+)/);
471
502
  return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
472
503
  }
473
- var GOOGLE_THINKING_BUDGET = {
474
- low: 1024,
475
- medium: 8192,
476
- high: 24576,
477
- xhigh: 24576
504
+ var GOOGLE_THINKING_LEVEL = {
505
+ low: "minimal",
506
+ medium: "low",
507
+ high: "medium",
508
+ xhigh: "high"
478
509
  };
479
510
  var GoogleDriver = class {
480
511
  client;
@@ -514,7 +545,7 @@ var GoogleDriver = class {
514
545
  this.client = new GoogleGenAI({ apiKey });
515
546
  return this.client;
516
547
  }
517
- async sendMessage(images, prompt) {
548
+ async sendMessage(images, prompt, _options) {
518
549
  const client = await this.getClient();
519
550
  try {
520
551
  const response = await client.models.generateContent({
@@ -525,20 +556,36 @@ var GoogleDriver = class {
525
556
  maxOutputTokens: this.maxTokens,
526
557
  ...this.reasoningEffort && {
527
558
  thinkingConfig: {
528
- thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
559
+ thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
529
560
  }
530
561
  }
531
562
  }
532
563
  });
564
+ const finishReason = response.candidates?.[0]?.finishReason;
565
+ if (finishReason === "MAX_TOKENS") {
566
+ throw new VisualAITruncationError(
567
+ `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
568
+ response.text ?? "",
569
+ this.maxTokens
570
+ );
571
+ }
572
+ if (finishReason && finishReason !== "STOP") {
573
+ throw new VisualAIProviderError(
574
+ `Response blocked: Google returned finishReason "${finishReason}".`
575
+ );
576
+ }
533
577
  const text = response.text ?? "";
578
+ const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
534
579
  return {
535
580
  text,
536
581
  usage: response.usageMetadata ? {
537
582
  inputTokens: response.usageMetadata.promptTokenCount ?? 0,
538
- outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
583
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
584
+ ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
539
585
  } : void 0
540
586
  };
541
587
  } catch (err) {
588
+ if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
542
589
  throw mapProviderError(err);
543
590
  }
544
591
  }
@@ -610,17 +657,25 @@ var OpenAIDriver = class {
610
657
  this.client = new OpenAI({ apiKey });
611
658
  return this.client;
612
659
  }
613
- async sendMessage(images, prompt) {
660
+ async sendMessage(images, prompt, options) {
614
661
  const client = await this.getClient();
615
662
  const imageBlocks = images.map((img) => ({
616
663
  type: "input_image",
617
664
  image_url: `data:${img.mimeType};base64,${img.base64}`
618
665
  }));
619
666
  try {
667
+ const format = options?.responseSchema ? {
668
+ type: "json_schema",
669
+ json_schema: {
670
+ name: "visual_ai_response",
671
+ strict: true,
672
+ schema: options.responseSchema
673
+ }
674
+ } : { type: "json_object" };
620
675
  const requestParams = {
621
676
  model: this.model,
622
677
  max_output_tokens: this.maxTokens,
623
- text: { format: { type: "json_object" } },
678
+ text: { format },
624
679
  input: [
625
680
  {
626
681
  role: "user",
@@ -632,15 +687,26 @@ var OpenAIDriver = class {
632
687
  requestParams.reasoning = { effort: this.reasoningEffort };
633
688
  }
634
689
  const response = await client.responses.create(requestParams);
690
+ if (response.status && response.status !== "completed") {
691
+ const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
692
+ throw new VisualAITruncationError(
693
+ `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
694
+ response.output_text ?? "",
695
+ this.maxTokens
696
+ );
697
+ }
635
698
  const text = response.output_text ?? "";
699
+ const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
636
700
  return {
637
701
  text,
638
702
  usage: response.usage ? {
639
703
  inputTokens: response.usage.input_tokens,
640
- outputTokens: response.usage.output_tokens
704
+ outputTokens: response.usage.output_tokens,
705
+ ...reasoningTokens !== void 0 && { reasoningTokens }
641
706
  } : void 0
642
707
  };
643
708
  } catch (err) {
709
+ if (err instanceof VisualAITruncationError) throw err;
644
710
  throw mapProviderError(err);
645
711
  }
646
712
  }
@@ -687,16 +753,40 @@ function parseBooleanEnv(envName, value) {
687
753
  `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
688
754
  );
689
755
  }
756
+ var debugDeprecationWarned = false;
690
757
  function resolveConfig(config) {
691
758
  const provider = resolveProvider(config);
692
759
  const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
760
+ const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
761
+ const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
762
+ const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
763
+ if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
764
+ debugDeprecationWarned = true;
765
+ process.stderr.write(
766
+ `[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
767
+ `
768
+ );
769
+ }
770
+ const userSetMaxTokens = config.maxTokens !== void 0;
771
+ let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
772
+ if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
773
+ maxTokens = OPENAI_REASONING_MAX_TOKENS;
774
+ if (debug) {
775
+ process.stderr.write(
776
+ `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
777
+ `
778
+ );
779
+ }
780
+ }
693
781
  return {
694
782
  provider,
695
783
  apiKey: config.apiKey,
696
784
  model,
697
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
785
+ maxTokens,
698
786
  reasoningEffort: config.reasoningEffort,
699
- debug: config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false,
787
+ debug,
788
+ debugPrompt,
789
+ debugResponse,
700
790
  trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
701
791
  };
702
792
  }
@@ -744,6 +834,10 @@ var PRICING_TABLE = {
744
834
  inputPricePerToken: 2 / PER_MILLION,
745
835
  outputPricePerToken: 12 / PER_MILLION
746
836
  },
837
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
838
+ inputPricePerToken: 0.25 / PER_MILLION,
839
+ outputPricePerToken: 1.5 / PER_MILLION
840
+ },
747
841
  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
748
842
  inputPricePerToken: 0.5 / PER_MILLION,
749
843
  outputPricePerToken: 3 / PER_MILLION
@@ -757,8 +851,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
757
851
  }
758
852
 
759
853
  // src/core/debug.ts
760
- function debugLog(config, label, data) {
761
- if (config.debug) {
854
+ function debugLog(config, label, data, kind = "error") {
855
+ const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
856
+ if (enabled) {
762
857
  process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
763
858
  `);
764
859
  }
@@ -766,8 +861,10 @@ function debugLog(config, label, data) {
766
861
  function usageLog(config, method, usage) {
767
862
  if (!config.trackUsage) return;
768
863
  const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
864
+ const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
865
+ const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
769
866
  process.stderr.write(
770
- `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
867
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
771
868
  `
772
869
  );
773
870
  }
@@ -777,15 +874,42 @@ function processUsage(method, rawUsage, durationSeconds, config) {
777
874
  const usage = {
778
875
  inputTokens,
779
876
  outputTokens,
877
+ ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
780
878
  estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
781
879
  durationSeconds
782
880
  };
783
881
  usageLog(config, method, usage);
784
882
  return usage;
785
883
  }
786
- async function timedSendMessage(driver, images, prompt) {
884
+ var MAX_RAW_RESPONSE_PREVIEW = 500;
885
+ function formatError(error) {
886
+ if (error instanceof VisualAITruncationError) {
887
+ const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
888
+ return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
889
+ }
890
+ if (error instanceof VisualAIResponseParseError) {
891
+ const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
892
+ return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
893
+ }
894
+ if (error instanceof VisualAIError) {
895
+ return `${error.name} (${error.code}): ${error.message}`;
896
+ }
897
+ if (error instanceof Error) {
898
+ return `${error.name}: ${error.message}`;
899
+ }
900
+ return String(error);
901
+ }
902
+ async function withErrorDebug(config, method, fn) {
903
+ try {
904
+ return await fn();
905
+ } catch (error) {
906
+ debugLog(config, `${method} error`, formatError(error), "error");
907
+ throw error;
908
+ }
909
+ }
910
+ async function timedSendMessage(driver, images, prompt, options) {
787
911
  const start = performance.now();
788
- const response = await driver.sendMessage(images, prompt);
912
+ const response = await driver.sendMessage(images, prompt, options);
789
913
  const durationSeconds = (performance.now() - start) / 1e3;
790
914
  return { ...response, durationSeconds };
791
915
  }
@@ -1025,6 +1149,8 @@ var StatementResultSchema = z.object({
1025
1149
  var UsageInfoSchema = z.object({
1026
1150
  inputTokens: z.number(),
1027
1151
  outputTokens: z.number(),
1152
+ /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
1153
+ reasoningTokens: z.number().optional(),
1028
1154
  estimatedCost: z.number().optional(),
1029
1155
  durationSeconds: z.number().nonnegative().optional()
1030
1156
  });
@@ -1078,8 +1204,24 @@ function parseResponse(raw, schema) {
1078
1204
  }
1079
1205
  return result.data;
1080
1206
  }
1207
+ function reconcileCheckResult(result) {
1208
+ if (result.statements.length === 0) {
1209
+ return result;
1210
+ }
1211
+ const passCount = result.statements.filter((s) => s.pass).length;
1212
+ const total = result.statements.length;
1213
+ const computedPass = passCount === total;
1214
+ const countPrefix = `${passCount} of ${total} checks passed`;
1215
+ const reasoning = `${countPrefix}. ${result.reasoning}`;
1216
+ return {
1217
+ ...result,
1218
+ pass: computedPass,
1219
+ reasoning
1220
+ };
1221
+ }
1081
1222
  function parseCheckResponse(raw) {
1082
- return parseResponse(raw, CheckResponseSchema);
1223
+ const result = parseResponse(raw, CheckResponseSchema);
1224
+ return reconcileCheckResult(result);
1083
1225
  }
1084
1226
  function parseAskResponse(raw) {
1085
1227
  return parseResponse(raw, AskResponseSchema);
@@ -1089,6 +1231,12 @@ function parseCompareResponse(raw) {
1089
1231
  }
1090
1232
 
1091
1233
  // src/core/client.ts
1234
+ import { zodToJsonSchema } from "zod-to-json-schema";
1235
+ function toSchemaOptions(schema) {
1236
+ return {
1237
+ responseSchema: zodToJsonSchema(schema, { target: "openAi" })
1238
+ };
1239
+ }
1092
1240
  var PROVIDER_REGISTRY = {
1093
1241
  anthropic: (config) => new AnthropicDriver(config),
1094
1242
  openai: (config) => new OpenAIDriver(config),
@@ -1097,6 +1245,9 @@ var PROVIDER_REGISTRY = {
1097
1245
  function createDriver(provider, config) {
1098
1246
  return PROVIDER_REGISTRY[provider](config);
1099
1247
  }
1248
+ var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
1249
+ var askSchemaOptions = toSchemaOptions(AskResponseSchema);
1250
+ var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
1100
1251
  function visualAI(config = {}) {
1101
1252
  const resolvedConfig = resolveConfig(config);
1102
1253
  const driverConfig = {
@@ -1111,16 +1262,18 @@ function visualAI(config = {}) {
1111
1262
  if (elements.length === 0) {
1112
1263
  throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
1113
1264
  }
1114
- const img = await normalizeImage(image);
1115
- const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1116
- debugLog(resolvedConfig, `${methodName} prompt`, prompt);
1117
- const response = await timedSendMessage(driver, [img], prompt);
1118
- debugLog(resolvedConfig, `${methodName} response`, response.text);
1119
- const result = parseCheckResponse(response.text);
1120
- return {
1121
- ...result,
1122
- usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1123
- };
1265
+ return withErrorDebug(resolvedConfig, methodName, async () => {
1266
+ const img = await normalizeImage(image);
1267
+ const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1268
+ debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
1269
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1270
+ debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
1271
+ const result = parseCheckResponse(response.text);
1272
+ return {
1273
+ ...result,
1274
+ usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1275
+ };
1276
+ });
1124
1277
  }
1125
1278
  return {
1126
1279
  async check(image, statements, options) {
@@ -1128,61 +1281,64 @@ function visualAI(config = {}) {
1128
1281
  if (stmts.length === 0) {
1129
1282
  throw new VisualAIConfigError("At least one statement is required for check()");
1130
1283
  }
1131
- const img = await normalizeImage(image);
1132
- const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1133
- debugLog(resolvedConfig, "check prompt", prompt);
1134
- const response = await timedSendMessage(driver, [img], prompt);
1135
- debugLog(resolvedConfig, "check response", response.text);
1136
- const result = parseCheckResponse(response.text);
1137
- return {
1138
- ...result,
1139
- usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1140
- };
1284
+ return withErrorDebug(resolvedConfig, "check", async () => {
1285
+ const img = await normalizeImage(image);
1286
+ const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1287
+ debugLog(resolvedConfig, "check prompt", prompt, "prompt");
1288
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1289
+ debugLog(resolvedConfig, "check response", response.text, "response");
1290
+ const result = parseCheckResponse(response.text);
1291
+ return {
1292
+ ...result,
1293
+ usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1294
+ };
1295
+ });
1141
1296
  },
1142
1297
  async ask(image, userPrompt, options) {
1143
- const img = await normalizeImage(image);
1144
- const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1145
- debugLog(resolvedConfig, "ask prompt", prompt);
1146
- const response = await timedSendMessage(driver, [img], prompt);
1147
- debugLog(resolvedConfig, "ask response", response.text);
1148
- const result = parseAskResponse(response.text);
1149
- return {
1150
- ...result,
1151
- usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1152
- };
1298
+ return withErrorDebug(resolvedConfig, "ask", async () => {
1299
+ const img = await normalizeImage(image);
1300
+ const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1301
+ debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
1302
+ const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
1303
+ debugLog(resolvedConfig, "ask response", response.text, "response");
1304
+ const result = parseAskResponse(response.text);
1305
+ return {
1306
+ ...result,
1307
+ usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1308
+ };
1309
+ });
1153
1310
  },
1154
1311
  async compare(imageA, imageB, options) {
1155
- const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1156
- const prompt = buildComparePrompt({
1157
- userPrompt: options?.prompt,
1158
- instructions: options?.instructions
1159
- });
1160
- debugLog(resolvedConfig, "compare prompt", prompt);
1161
- const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1162
- debugLog(resolvedConfig, "compare response", response.text);
1163
- const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1164
- const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1165
- let diffImage;
1166
- if (effectiveDiffImage) {
1167
- try {
1168
- diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1169
- } catch (err) {
1170
- const msg = err instanceof Error ? err.message : String(err);
1171
- debugLog(resolvedConfig, "ai diff error", msg);
1172
- if (!resolvedConfig.debug) {
1312
+ return withErrorDebug(resolvedConfig, "compare", async () => {
1313
+ const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1314
+ const prompt = buildComparePrompt({
1315
+ userPrompt: options?.prompt,
1316
+ instructions: options?.instructions
1317
+ });
1318
+ debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
1319
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
1320
+ debugLog(resolvedConfig, "compare response", response.text, "response");
1321
+ const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1322
+ const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1323
+ let diffImage;
1324
+ if (effectiveDiffImage) {
1325
+ try {
1326
+ diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1327
+ } catch (err) {
1328
+ const msg = err instanceof Error ? err.message : String(err);
1173
1329
  process.stderr.write(
1174
1330
  `[visual-ai-assertions] warning: diff generation failed: ${msg}
1175
1331
  `
1176
1332
  );
1177
1333
  }
1178
1334
  }
1179
- }
1180
- const result = parseCompareResponse(response.text);
1181
- return {
1182
- ...result,
1183
- ...diffImage ? { diffImage } : {},
1184
- usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1185
- };
1335
+ const result = parseCompareResponse(response.text);
1336
+ return {
1337
+ ...result,
1338
+ ...diffImage ? { diffImage } : {},
1339
+ usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1340
+ };
1341
+ });
1186
1342
  },
1187
1343
  elementsVisible(image, elements, options) {
1188
1344
  return checkElementsVisibility(image, elements, true, options);
@@ -1191,57 +1347,65 @@ function visualAI(config = {}) {
1191
1347
  return checkElementsVisibility(image, elements, false, options);
1192
1348
  },
1193
1349
  async accessibility(image, options) {
1194
- const img = await normalizeImage(image);
1195
- const prompt = buildAccessibilityPrompt(options);
1196
- debugLog(resolvedConfig, "accessibility prompt", prompt);
1197
- const response = await timedSendMessage(driver, [img], prompt);
1198
- debugLog(resolvedConfig, "accessibility response", response.text);
1199
- const result = parseCheckResponse(response.text);
1200
- return {
1201
- ...result,
1202
- usage: processUsage(
1203
- "accessibility",
1204
- response.usage,
1205
- response.durationSeconds,
1206
- resolvedConfig
1207
- )
1208
- };
1350
+ return withErrorDebug(resolvedConfig, "accessibility", async () => {
1351
+ const img = await normalizeImage(image);
1352
+ const prompt = buildAccessibilityPrompt(options);
1353
+ debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
1354
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1355
+ debugLog(resolvedConfig, "accessibility response", response.text, "response");
1356
+ const result = parseCheckResponse(response.text);
1357
+ return {
1358
+ ...result,
1359
+ usage: processUsage(
1360
+ "accessibility",
1361
+ response.usage,
1362
+ response.durationSeconds,
1363
+ resolvedConfig
1364
+ )
1365
+ };
1366
+ });
1209
1367
  },
1210
1368
  async layout(image, options) {
1211
- const img = await normalizeImage(image);
1212
- const prompt = buildLayoutPrompt(options);
1213
- debugLog(resolvedConfig, "layout prompt", prompt);
1214
- const response = await timedSendMessage(driver, [img], prompt);
1215
- debugLog(resolvedConfig, "layout response", response.text);
1216
- const result = parseCheckResponse(response.text);
1217
- return {
1218
- ...result,
1219
- usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1220
- };
1369
+ return withErrorDebug(resolvedConfig, "layout", async () => {
1370
+ const img = await normalizeImage(image);
1371
+ const prompt = buildLayoutPrompt(options);
1372
+ debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
1373
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1374
+ debugLog(resolvedConfig, "layout response", response.text, "response");
1375
+ const result = parseCheckResponse(response.text);
1376
+ return {
1377
+ ...result,
1378
+ usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1379
+ };
1380
+ });
1221
1381
  },
1222
1382
  async pageLoad(image, options) {
1223
- const img = await normalizeImage(image);
1224
- const prompt = buildPageLoadPrompt(options);
1225
- debugLog(resolvedConfig, "pageLoad prompt", prompt);
1226
- const response = await timedSendMessage(driver, [img], prompt);
1227
- debugLog(resolvedConfig, "pageLoad response", response.text);
1228
- const result = parseCheckResponse(response.text);
1229
- return {
1230
- ...result,
1231
- usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1232
- };
1383
+ return withErrorDebug(resolvedConfig, "pageLoad", async () => {
1384
+ const img = await normalizeImage(image);
1385
+ const prompt = buildPageLoadPrompt(options);
1386
+ debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
1387
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1388
+ debugLog(resolvedConfig, "pageLoad response", response.text, "response");
1389
+ const result = parseCheckResponse(response.text);
1390
+ return {
1391
+ ...result,
1392
+ usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1393
+ };
1394
+ });
1233
1395
  },
1234
1396
  async content(image, options) {
1235
- const img = await normalizeImage(image);
1236
- const prompt = buildContentPrompt(options);
1237
- debugLog(resolvedConfig, "content prompt", prompt);
1238
- const response = await timedSendMessage(driver, [img], prompt);
1239
- debugLog(resolvedConfig, "content response", response.text);
1240
- const result = parseCheckResponse(response.text);
1241
- return {
1242
- ...result,
1243
- usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1244
- };
1397
+ return withErrorDebug(resolvedConfig, "content", async () => {
1398
+ const img = await normalizeImage(image);
1399
+ const prompt = buildContentPrompt(options);
1400
+ debugLog(resolvedConfig, "content prompt", prompt, "prompt");
1401
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1402
+ debugLog(resolvedConfig, "content response", response.text, "response");
1403
+ const result = parseCheckResponse(response.text);
1404
+ return {
1405
+ ...result,
1406
+ usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1407
+ };
1408
+ });
1245
1409
  }
1246
1410
  };
1247
1411
  }
@@ -1325,6 +1489,7 @@ export {
1325
1489
  VisualAIProviderError,
1326
1490
  VisualAIRateLimitError,
1327
1491
  VisualAIResponseParseError,
1492
+ VisualAITruncationError,
1328
1493
  assertVisualCompareResult,
1329
1494
  assertVisualResult,
1330
1495
  formatCheckResult,