visual-ai-assertions 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -387,14 +387,13 @@ The `VisualAIKnownError` union and `isVisualAIKnownError()` helper are useful wh
387
387
 
388
388
  ### Optional Configuration
389
389
 
390
- | Variable | Description |
391
- | ---------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
392
- | `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
393
- | `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
394
- | `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
395
- | `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
396
- | `VISUAL_AI_REASONING_EFFORT` | Default reasoning effort when `reasoningEffort` is not set in config. Use `"low"`, `"medium"`, `"high"`, or `"xhigh"`. |
397
- | `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
390
+ | Variable | Description |
391
+ | -------------------------- | -------------------------------------------------------------------------------------------------------------- |
392
+ | `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
393
+ | `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
394
+ | `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
395
+ | `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
396
+ | `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
398
397
 
399
398
  ## Configuration
400
399
 
@@ -468,19 +467,22 @@ All listed models support image/vision input. Pass any model ID to the `model` c
468
467
 
469
468
  ### OpenAI
470
469
 
471
- | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
472
- | ----------- | ------------- | ------------ | ------------- | ------------------------------ |
473
- | GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
474
- | GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
475
- | GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
476
- | GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
470
+ | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
471
+ | ------------ | -------------- | ------------ | ------------- | ------------------------------ |
472
+ | GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
473
+ | GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
474
+ | GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
475
+ | GPT-5.4 mini | `gpt-5.4-mini` | $0.75 | $4.50 | Fast and affordable |
476
+ | GPT-5.4 nano | `gpt-5.4-nano` | $0.20 | $1.25 | Cheapest OpenAI option |
477
+ | GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
477
478
 
478
479
  ### Google
479
480
 
480
- | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
481
- | -------------- | ------------------------ | ------------ | ------------- | --------------------------------- |
482
- | Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
483
- | Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default**fast and capable |
481
+ | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
482
+ | --------------------- | ------------------------------- | ------------ | ------------- | --------------------------------- |
483
+ | Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
484
+ | Gemini 3.1 Flash Lite | `gemini-3.1-flash-lite-preview` | $0.25 | $1.50 | Previewlightweight and cheap |
485
+ | Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default** — fast and capable |
484
486
 
485
487
  ## License
486
488
 
package/dist/index.cjs CHANGED
@@ -44,6 +44,7 @@ __export(index_exports, {
44
44
  Layout: () => Layout,
45
45
  Model: () => Model,
46
46
  Provider: () => Provider,
47
+ ReasoningEffort: () => ReasoningEffort,
47
48
  StatementResultSchema: () => StatementResultSchema,
48
49
  UsageInfoSchema: () => UsageInfoSchema,
49
50
  VisualAIAssertionError: () => VisualAIAssertionError,
@@ -54,6 +55,7 @@ __export(index_exports, {
54
55
  VisualAIProviderError: () => VisualAIProviderError,
55
56
  VisualAIRateLimitError: () => VisualAIRateLimitError,
56
57
  VisualAIResponseParseError: () => VisualAIResponseParseError,
58
+ VisualAITruncationError: () => VisualAITruncationError,
57
59
  assertVisualCompareResult: () => assertVisualCompareResult,
58
60
  assertVisualResult: () => assertVisualResult,
59
61
  formatCheckResult: () => formatCheckResult,
@@ -64,6 +66,12 @@ __export(index_exports, {
64
66
  module.exports = __toCommonJS(index_exports);
65
67
 
66
68
  // src/constants.ts
69
+ var ReasoningEffort = {
70
+ LOW: "low",
71
+ MEDIUM: "medium",
72
+ HIGH: "high",
73
+ XHIGH: "xhigh"
74
+ };
67
75
  var Provider = {
68
76
  ANTHROPIC: "anthropic",
69
77
  OPENAI: "openai",
@@ -85,6 +93,7 @@ var Model = {
85
93
  },
86
94
  Google: {
87
95
  GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
96
+ GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
88
97
  GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
89
98
  }
90
99
  };
@@ -94,6 +103,7 @@ var DEFAULT_MODELS = {
94
103
  [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
95
104
  };
96
105
  var DEFAULT_MAX_TOKENS = 4096;
106
+ var OPENAI_REASONING_MAX_TOKENS = 16384;
97
107
  var MODEL_TO_PROVIDER = new Map([
98
108
  ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
99
109
  ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
@@ -178,6 +188,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
178
188
  this.rawResponse = rawResponse;
179
189
  }
180
190
  };
191
+ var VisualAITruncationError = class extends VisualAIError {
192
+ partialResponse;
193
+ maxTokens;
194
+ constructor(message, partialResponse, maxTokens) {
195
+ super(message, "RESPONSE_TRUNCATED");
196
+ this.name = "VisualAITruncationError";
197
+ this.partialResponse = partialResponse;
198
+ this.maxTokens = maxTokens;
199
+ }
200
+ };
181
201
  var VisualAIConfigError = class extends VisualAIError {
182
202
  constructor(message) {
183
203
  super(message, "CONFIG_INVALID");
@@ -193,7 +213,7 @@ var VisualAIAssertionError = class extends VisualAIError {
193
213
  }
194
214
  };
195
215
  function isVisualAIKnownError(error) {
196
- return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
216
+ return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
197
217
  }
198
218
 
199
219
  // src/core/prompt.ts
@@ -207,12 +227,18 @@ Each issue must have:
207
227
  - "description": what the issue is
208
228
  - "suggestion": how to fix or improve it
209
229
  `;
210
- var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
230
+ var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
231
+ 1. First, evaluate EACH statement independently and populate the "statements" array
232
+ 2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
233
+ 3. Write "reasoning" as a brief overall summary of the evaluation
234
+ 4. Include "issues" only for statements that failed
235
+
236
+ Respond with a JSON object matching this exact structure:
211
237
  {
212
- "pass": boolean, // true ONLY if ALL statements are true
213
- "reasoning": string, // brief overall summary (e.g. "3 of 4 checks passed...")
214
- "issues": [...], // list of issues found (empty if all pass)
215
- "statements": [ // one entry per statement, in order
238
+ "pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
239
+ "reasoning": string, // brief overall summary of the evaluation
240
+ "issues": [...], // one issue per failing statement (empty if all pass)
241
+ "statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
216
242
  {
217
243
  "statement": string, // the original statement text
218
244
  "pass": boolean, // whether this statement is true
@@ -231,7 +257,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
231
257
  Example for a failing check:
232
258
  {
233
259
  "pass": false,
234
- "reasoning": "1 of 2 checks failed. The submit button is not visible.",
260
+ "reasoning": "The submit button is not visible on the page.",
235
261
  "issues": [
236
262
  { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
237
263
  ],
@@ -491,7 +517,7 @@ var AnthropicDriver = class {
491
517
  this.client = new Anthropic({ apiKey });
492
518
  return this.client;
493
519
  }
494
- async sendMessage(images, prompt) {
520
+ async sendMessage(images, prompt, _options) {
495
521
  const client = await this.getClient();
496
522
  const imageBlocks = images.map((img) => ({
497
523
  type: "image",
@@ -521,6 +547,13 @@ var AnthropicDriver = class {
521
547
  const message = await client.messages.create(requestParams);
522
548
  const textBlock = message.content.find((block) => block.type === "text");
523
549
  const text = textBlock?.text ?? "";
550
+ if (message.stop_reason === "max_tokens") {
551
+ throw new VisualAITruncationError(
552
+ `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
553
+ text,
554
+ this.maxTokens
555
+ );
556
+ }
524
557
  return {
525
558
  text,
526
559
  usage: {
@@ -529,6 +562,7 @@ var AnthropicDriver = class {
529
562
  }
530
563
  };
531
564
  } catch (err) {
565
+ if (err instanceof VisualAITruncationError) throw err;
532
566
  throw mapProviderError(err);
533
567
  }
534
568
  }
@@ -540,11 +574,11 @@ function needsCodeExecution(model) {
540
574
  const match = model.match(/^gemini-(\d+)/);
541
575
  return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
542
576
  }
543
- var GOOGLE_THINKING_BUDGET = {
544
- low: 1024,
545
- medium: 8192,
546
- high: 24576,
547
- xhigh: 24576
577
+ var GOOGLE_THINKING_LEVEL = {
578
+ low: "minimal",
579
+ medium: "low",
580
+ high: "medium",
581
+ xhigh: "high"
548
582
  };
549
583
  var GoogleDriver = class {
550
584
  client;
@@ -584,7 +618,7 @@ var GoogleDriver = class {
584
618
  this.client = new GoogleGenAI({ apiKey });
585
619
  return this.client;
586
620
  }
587
- async sendMessage(images, prompt) {
621
+ async sendMessage(images, prompt, _options) {
588
622
  const client = await this.getClient();
589
623
  try {
590
624
  const response = await client.models.generateContent({
@@ -595,20 +629,36 @@ var GoogleDriver = class {
595
629
  maxOutputTokens: this.maxTokens,
596
630
  ...this.reasoningEffort && {
597
631
  thinkingConfig: {
598
- thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
632
+ thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
599
633
  }
600
634
  }
601
635
  }
602
636
  });
637
+ const finishReason = response.candidates?.[0]?.finishReason;
638
+ if (finishReason === "MAX_TOKENS") {
639
+ throw new VisualAITruncationError(
640
+ `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
641
+ response.text ?? "",
642
+ this.maxTokens
643
+ );
644
+ }
645
+ if (finishReason && finishReason !== "STOP") {
646
+ throw new VisualAIProviderError(
647
+ `Response blocked: Google returned finishReason "${finishReason}".`
648
+ );
649
+ }
603
650
  const text = response.text ?? "";
651
+ const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
604
652
  return {
605
653
  text,
606
654
  usage: response.usageMetadata ? {
607
655
  inputTokens: response.usageMetadata.promptTokenCount ?? 0,
608
- outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
656
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
657
+ ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
609
658
  } : void 0
610
659
  };
611
660
  } catch (err) {
661
+ if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
612
662
  throw mapProviderError(err);
613
663
  }
614
664
  }
@@ -680,17 +730,25 @@ var OpenAIDriver = class {
680
730
  this.client = new OpenAI({ apiKey });
681
731
  return this.client;
682
732
  }
683
- async sendMessage(images, prompt) {
733
+ async sendMessage(images, prompt, options) {
684
734
  const client = await this.getClient();
685
735
  const imageBlocks = images.map((img) => ({
686
736
  type: "input_image",
687
737
  image_url: `data:${img.mimeType};base64,${img.base64}`
688
738
  }));
689
739
  try {
740
+ const format = options?.responseSchema ? {
741
+ type: "json_schema",
742
+ json_schema: {
743
+ name: "visual_ai_response",
744
+ strict: true,
745
+ schema: options.responseSchema
746
+ }
747
+ } : { type: "json_object", name: "visual_ai_response" };
690
748
  const requestParams = {
691
749
  model: this.model,
692
750
  max_output_tokens: this.maxTokens,
693
- text: { format: { type: "json_object" } },
751
+ text: { format },
694
752
  input: [
695
753
  {
696
754
  role: "user",
@@ -702,15 +760,26 @@ var OpenAIDriver = class {
702
760
  requestParams.reasoning = { effort: this.reasoningEffort };
703
761
  }
704
762
  const response = await client.responses.create(requestParams);
763
+ if (response.status && response.status !== "completed") {
764
+ const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
765
+ throw new VisualAITruncationError(
766
+ `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
767
+ response.output_text ?? "",
768
+ this.maxTokens
769
+ );
770
+ }
705
771
  const text = response.output_text ?? "";
772
+ const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
706
773
  return {
707
774
  text,
708
775
  usage: response.usage ? {
709
776
  inputTokens: response.usage.input_tokens,
710
- outputTokens: response.usage.output_tokens
777
+ outputTokens: response.usage.output_tokens,
778
+ ...reasoningTokens !== void 0 && { reasoningTokens }
711
779
  } : void 0
712
780
  };
713
781
  } catch (err) {
782
+ if (err instanceof VisualAITruncationError) throw err;
714
783
  throw mapProviderError(err);
715
784
  }
716
785
  }
@@ -757,15 +826,6 @@ function parseBooleanEnv(envName, value) {
757
826
  `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
758
827
  );
759
828
  }
760
- var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
761
- function parseReasoningEffortEnv(envName, value) {
762
- if (value === void 0 || value === "") return void 0;
763
- const lower = value.toLowerCase();
764
- if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
765
- throw new VisualAIConfigError(
766
- `Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
767
- );
768
- }
769
829
  var debugDeprecationWarned = false;
770
830
  function resolveConfig(config) {
771
831
  const provider = resolveProvider(config);
@@ -780,12 +840,23 @@ function resolveConfig(config) {
780
840
  `
781
841
  );
782
842
  }
843
+ const userSetMaxTokens = config.maxTokens !== void 0;
844
+ let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
845
+ if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
846
+ maxTokens = OPENAI_REASONING_MAX_TOKENS;
847
+ if (debug) {
848
+ process.stderr.write(
849
+ `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
850
+ `
851
+ );
852
+ }
853
+ }
783
854
  return {
784
855
  provider,
785
856
  apiKey: config.apiKey,
786
857
  model,
787
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
788
- reasoningEffort: config.reasoningEffort ?? parseReasoningEffortEnv("VISUAL_AI_REASONING_EFFORT", process.env.VISUAL_AI_REASONING_EFFORT),
858
+ maxTokens,
859
+ reasoningEffort: config.reasoningEffort,
789
860
  debug,
790
861
  debugPrompt,
791
862
  debugResponse,
@@ -836,6 +907,10 @@ var PRICING_TABLE = {
836
907
  inputPricePerToken: 2 / PER_MILLION,
837
908
  outputPricePerToken: 12 / PER_MILLION
838
909
  },
910
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
911
+ inputPricePerToken: 0.25 / PER_MILLION,
912
+ outputPricePerToken: 1.5 / PER_MILLION
913
+ },
839
914
  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
840
915
  inputPricePerToken: 0.5 / PER_MILLION,
841
916
  outputPricePerToken: 3 / PER_MILLION
@@ -860,8 +935,9 @@ function usageLog(config, method, usage) {
860
935
  if (!config.trackUsage) return;
861
936
  const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
862
937
  const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
938
+ const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
863
939
  process.stderr.write(
864
- `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
940
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
865
941
  `
866
942
  );
867
943
  }
@@ -871,6 +947,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
871
947
  const usage = {
872
948
  inputTokens,
873
949
  outputTokens,
950
+ ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
874
951
  estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
875
952
  durationSeconds
876
953
  };
@@ -879,6 +956,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
879
956
  }
880
957
  var MAX_RAW_RESPONSE_PREVIEW = 500;
881
958
  function formatError(error) {
959
+ if (error instanceof VisualAITruncationError) {
960
+ const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
961
+ return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
962
+ }
882
963
  if (error instanceof VisualAIResponseParseError) {
883
964
  const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
884
965
  return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
@@ -899,9 +980,9 @@ async function withErrorDebug(config, method, fn) {
899
980
  throw error;
900
981
  }
901
982
  }
902
- async function timedSendMessage(driver, images, prompt) {
983
+ async function timedSendMessage(driver, images, prompt, options) {
903
984
  const start = performance.now();
904
- const response = await driver.sendMessage(images, prompt);
985
+ const response = await driver.sendMessage(images, prompt, options);
905
986
  const durationSeconds = (performance.now() - start) / 1e3;
906
987
  return { ...response, durationSeconds };
907
988
  }
@@ -1141,6 +1222,8 @@ var StatementResultSchema = import_zod.z.object({
1141
1222
  var UsageInfoSchema = import_zod.z.object({
1142
1223
  inputTokens: import_zod.z.number(),
1143
1224
  outputTokens: import_zod.z.number(),
1225
+ /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
1226
+ reasoningTokens: import_zod.z.number().optional(),
1144
1227
  estimatedCost: import_zod.z.number().optional(),
1145
1228
  durationSeconds: import_zod.z.number().nonnegative().optional()
1146
1229
  });
@@ -1194,8 +1277,24 @@ function parseResponse(raw, schema) {
1194
1277
  }
1195
1278
  return result.data;
1196
1279
  }
1280
+ function reconcileCheckResult(result) {
1281
+ if (result.statements.length === 0) {
1282
+ return result;
1283
+ }
1284
+ const passCount = result.statements.filter((s) => s.pass).length;
1285
+ const total = result.statements.length;
1286
+ const computedPass = passCount === total;
1287
+ const countPrefix = `${passCount} of ${total} checks passed`;
1288
+ const reasoning = `${countPrefix}. ${result.reasoning}`;
1289
+ return {
1290
+ ...result,
1291
+ pass: computedPass,
1292
+ reasoning
1293
+ };
1294
+ }
1197
1295
  function parseCheckResponse(raw) {
1198
- return parseResponse(raw, CheckResponseSchema);
1296
+ const result = parseResponse(raw, CheckResponseSchema);
1297
+ return reconcileCheckResult(result);
1199
1298
  }
1200
1299
  function parseAskResponse(raw) {
1201
1300
  return parseResponse(raw, AskResponseSchema);
@@ -1205,6 +1304,12 @@ function parseCompareResponse(raw) {
1205
1304
  }
1206
1305
 
1207
1306
  // src/core/client.ts
1307
+ var import_zod_to_json_schema = require("zod-to-json-schema");
1308
+ function toSchemaOptions(schema) {
1309
+ return {
1310
+ responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
1311
+ };
1312
+ }
1208
1313
  var PROVIDER_REGISTRY = {
1209
1314
  anthropic: (config) => new AnthropicDriver(config),
1210
1315
  openai: (config) => new OpenAIDriver(config),
@@ -1213,6 +1318,9 @@ var PROVIDER_REGISTRY = {
1213
1318
  function createDriver(provider, config) {
1214
1319
  return PROVIDER_REGISTRY[provider](config);
1215
1320
  }
1321
+ var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
1322
+ var askSchemaOptions = toSchemaOptions(AskResponseSchema);
1323
+ var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
1216
1324
  function visualAI(config = {}) {
1217
1325
  const resolvedConfig = resolveConfig(config);
1218
1326
  const driverConfig = {
@@ -1231,7 +1339,7 @@ function visualAI(config = {}) {
1231
1339
  const img = await normalizeImage(image);
1232
1340
  const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1233
1341
  debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
1234
- const response = await timedSendMessage(driver, [img], prompt);
1342
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1235
1343
  debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
1236
1344
  const result = parseCheckResponse(response.text);
1237
1345
  return {
@@ -1250,7 +1358,7 @@ function visualAI(config = {}) {
1250
1358
  const img = await normalizeImage(image);
1251
1359
  const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1252
1360
  debugLog(resolvedConfig, "check prompt", prompt, "prompt");
1253
- const response = await timedSendMessage(driver, [img], prompt);
1361
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1254
1362
  debugLog(resolvedConfig, "check response", response.text, "response");
1255
1363
  const result = parseCheckResponse(response.text);
1256
1364
  return {
@@ -1264,7 +1372,7 @@ function visualAI(config = {}) {
1264
1372
  const img = await normalizeImage(image);
1265
1373
  const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1266
1374
  debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
1267
- const response = await timedSendMessage(driver, [img], prompt);
1375
+ const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
1268
1376
  debugLog(resolvedConfig, "ask response", response.text, "response");
1269
1377
  const result = parseAskResponse(response.text);
1270
1378
  return {
@@ -1281,7 +1389,7 @@ function visualAI(config = {}) {
1281
1389
  instructions: options?.instructions
1282
1390
  });
1283
1391
  debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
1284
- const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1392
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
1285
1393
  debugLog(resolvedConfig, "compare response", response.text, "response");
1286
1394
  const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1287
1395
  const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
@@ -1316,7 +1424,7 @@ function visualAI(config = {}) {
1316
1424
  const img = await normalizeImage(image);
1317
1425
  const prompt = buildAccessibilityPrompt(options);
1318
1426
  debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
1319
- const response = await timedSendMessage(driver, [img], prompt);
1427
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1320
1428
  debugLog(resolvedConfig, "accessibility response", response.text, "response");
1321
1429
  const result = parseCheckResponse(response.text);
1322
1430
  return {
@@ -1335,7 +1443,7 @@ function visualAI(config = {}) {
1335
1443
  const img = await normalizeImage(image);
1336
1444
  const prompt = buildLayoutPrompt(options);
1337
1445
  debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
1338
- const response = await timedSendMessage(driver, [img], prompt);
1446
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1339
1447
  debugLog(resolvedConfig, "layout response", response.text, "response");
1340
1448
  const result = parseCheckResponse(response.text);
1341
1449
  return {
@@ -1349,7 +1457,7 @@ function visualAI(config = {}) {
1349
1457
  const img = await normalizeImage(image);
1350
1458
  const prompt = buildPageLoadPrompt(options);
1351
1459
  debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
1352
- const response = await timedSendMessage(driver, [img], prompt);
1460
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1353
1461
  debugLog(resolvedConfig, "pageLoad response", response.text, "response");
1354
1462
  const result = parseCheckResponse(response.text);
1355
1463
  return {
@@ -1363,7 +1471,7 @@ function visualAI(config = {}) {
1363
1471
  const img = await normalizeImage(image);
1364
1472
  const prompt = buildContentPrompt(options);
1365
1473
  debugLog(resolvedConfig, "content prompt", prompt, "prompt");
1366
- const response = await timedSendMessage(driver, [img], prompt);
1474
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1367
1475
  debugLog(resolvedConfig, "content response", response.text, "response");
1368
1476
  const result = parseCheckResponse(response.text);
1369
1477
  return {
@@ -1445,6 +1553,7 @@ function assertVisualCompareResult(result, label) {
1445
1553
  Layout,
1446
1554
  Model,
1447
1555
  Provider,
1556
+ ReasoningEffort,
1448
1557
  StatementResultSchema,
1449
1558
  UsageInfoSchema,
1450
1559
  VisualAIAssertionError,
@@ -1455,6 +1564,7 @@ function assertVisualCompareResult(result, label) {
1455
1564
  VisualAIProviderError,
1456
1565
  VisualAIRateLimitError,
1457
1566
  VisualAIResponseParseError,
1567
+ VisualAITruncationError,
1458
1568
  assertVisualCompareResult,
1459
1569
  assertVisualResult,
1460
1570
  formatCheckResult,