visual-ai-assertions 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -387,14 +387,13 @@ The `VisualAIKnownError` union and `isVisualAIKnownError()` helper are useful wh
387
387
 
388
388
  ### Optional Configuration
389
389
 
390
- | Variable | Description |
391
- | ---------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
392
- | `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
393
- | `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
394
- | `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
395
- | `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
396
- | `VISUAL_AI_REASONING_EFFORT` | Default reasoning effort when `reasoningEffort` is not set in config. Use `"low"`, `"medium"`, `"high"`, or `"xhigh"`. |
397
- | `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
390
+ | Variable | Description |
391
+ | -------------------------- | -------------------------------------------------------------------------------------------------------------- |
392
+ | `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
393
+ | `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
394
+ | `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
395
+ | `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
396
+ | `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
398
397
 
399
398
  ## Configuration
400
399
 
@@ -468,19 +467,22 @@ All listed models support image/vision input. Pass any model ID to the `model` c
468
467
 
469
468
  ### OpenAI
470
469
 
471
- | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
472
- | ----------- | ------------- | ------------ | ------------- | ------------------------------ |
473
- | GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
474
- | GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
475
- | GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
476
- | GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
470
+ | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
471
+ | ------------ | -------------- | ------------ | ------------- | ------------------------------ |
472
+ | GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
473
+ | GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
474
+ | GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
475
+ | GPT-5.4 mini | `gpt-5.4-mini` | $0.75 | $4.50 | Fast and affordable |
476
+ | GPT-5.4 nano | `gpt-5.4-nano` | $0.20 | $1.25 | Cheapest OpenAI option |
477
+ | GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
477
478
 
478
479
  ### Google
479
480
 
480
- | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
481
- | -------------- | ------------------------ | ------------ | ------------- | --------------------------------- |
482
- | Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
483
- | Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default**fast and capable |
481
+ | Model | Model ID | Input $/MTok | Output $/MTok | Notes |
482
+ | --------------------- | ------------------------------- | ------------ | ------------- | --------------------------------- |
483
+ | Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
484
+ | Gemini 3.1 Flash Lite | `gemini-3.1-flash-lite-preview` | $0.25 | $1.50 | Previewlightweight and cheap |
485
+ | Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default** — fast and capable |
484
486
 
485
487
  ## License
486
488
 
package/dist/index.cjs CHANGED
@@ -54,6 +54,7 @@ __export(index_exports, {
54
54
  VisualAIProviderError: () => VisualAIProviderError,
55
55
  VisualAIRateLimitError: () => VisualAIRateLimitError,
56
56
  VisualAIResponseParseError: () => VisualAIResponseParseError,
57
+ VisualAITruncationError: () => VisualAITruncationError,
57
58
  assertVisualCompareResult: () => assertVisualCompareResult,
58
59
  assertVisualResult: () => assertVisualResult,
59
60
  formatCheckResult: () => formatCheckResult,
@@ -85,6 +86,7 @@ var Model = {
85
86
  },
86
87
  Google: {
87
88
  GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
89
+ GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
88
90
  GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
89
91
  }
90
92
  };
@@ -94,6 +96,7 @@ var DEFAULT_MODELS = {
94
96
  [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
95
97
  };
96
98
  var DEFAULT_MAX_TOKENS = 4096;
99
+ var OPENAI_REASONING_MAX_TOKENS = 16384;
97
100
  var MODEL_TO_PROVIDER = new Map([
98
101
  ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
99
102
  ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
@@ -178,6 +181,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
178
181
  this.rawResponse = rawResponse;
179
182
  }
180
183
  };
184
+ var VisualAITruncationError = class extends VisualAIError {
185
+ partialResponse;
186
+ maxTokens;
187
+ constructor(message, partialResponse, maxTokens) {
188
+ super(message, "RESPONSE_TRUNCATED");
189
+ this.name = "VisualAITruncationError";
190
+ this.partialResponse = partialResponse;
191
+ this.maxTokens = maxTokens;
192
+ }
193
+ };
181
194
  var VisualAIConfigError = class extends VisualAIError {
182
195
  constructor(message) {
183
196
  super(message, "CONFIG_INVALID");
@@ -193,7 +206,7 @@ var VisualAIAssertionError = class extends VisualAIError {
193
206
  }
194
207
  };
195
208
  function isVisualAIKnownError(error) {
196
- return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
209
+ return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
197
210
  }
198
211
 
199
212
  // src/core/prompt.ts
@@ -207,12 +220,18 @@ Each issue must have:
207
220
  - "description": what the issue is
208
221
  - "suggestion": how to fix or improve it
209
222
  `;
210
- var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
223
+ var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
224
+ 1. First, evaluate EACH statement independently and populate the "statements" array
225
+ 2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
226
+ 3. Write "reasoning" as a brief overall summary of the evaluation
227
+ 4. Include "issues" only for statements that failed
228
+
229
+ Respond with a JSON object matching this exact structure:
211
230
  {
212
- "pass": boolean, // true ONLY if ALL statements are true
213
- "reasoning": string, // brief overall summary (e.g. "3 of 4 checks passed...")
214
- "issues": [...], // list of issues found (empty if all pass)
215
- "statements": [ // one entry per statement, in order
231
+ "pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
232
+ "reasoning": string, // brief overall summary of the evaluation
233
+ "issues": [...], // one issue per failing statement (empty if all pass)
234
+ "statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
216
235
  {
217
236
  "statement": string, // the original statement text
218
237
  "pass": boolean, // whether this statement is true
@@ -231,7 +250,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
231
250
  Example for a failing check:
232
251
  {
233
252
  "pass": false,
234
- "reasoning": "1 of 2 checks failed. The submit button is not visible.",
253
+ "reasoning": "The submit button is not visible on the page.",
235
254
  "issues": [
236
255
  { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
237
256
  ],
@@ -491,7 +510,7 @@ var AnthropicDriver = class {
491
510
  this.client = new Anthropic({ apiKey });
492
511
  return this.client;
493
512
  }
494
- async sendMessage(images, prompt) {
513
+ async sendMessage(images, prompt, _options) {
495
514
  const client = await this.getClient();
496
515
  const imageBlocks = images.map((img) => ({
497
516
  type: "image",
@@ -521,6 +540,13 @@ var AnthropicDriver = class {
521
540
  const message = await client.messages.create(requestParams);
522
541
  const textBlock = message.content.find((block) => block.type === "text");
523
542
  const text = textBlock?.text ?? "";
543
+ if (message.stop_reason === "max_tokens") {
544
+ throw new VisualAITruncationError(
545
+ `Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
546
+ text,
547
+ this.maxTokens
548
+ );
549
+ }
524
550
  return {
525
551
  text,
526
552
  usage: {
@@ -529,6 +555,7 @@ var AnthropicDriver = class {
529
555
  }
530
556
  };
531
557
  } catch (err) {
558
+ if (err instanceof VisualAITruncationError) throw err;
532
559
  throw mapProviderError(err);
533
560
  }
534
561
  }
@@ -540,11 +567,11 @@ function needsCodeExecution(model) {
540
567
  const match = model.match(/^gemini-(\d+)/);
541
568
  return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
542
569
  }
543
- var GOOGLE_THINKING_BUDGET = {
544
- low: 1024,
545
- medium: 8192,
546
- high: 24576,
547
- xhigh: 24576
570
+ var GOOGLE_THINKING_LEVEL = {
571
+ low: "minimal",
572
+ medium: "low",
573
+ high: "medium",
574
+ xhigh: "high"
548
575
  };
549
576
  var GoogleDriver = class {
550
577
  client;
@@ -584,7 +611,7 @@ var GoogleDriver = class {
584
611
  this.client = new GoogleGenAI({ apiKey });
585
612
  return this.client;
586
613
  }
587
- async sendMessage(images, prompt) {
614
+ async sendMessage(images, prompt, _options) {
588
615
  const client = await this.getClient();
589
616
  try {
590
617
  const response = await client.models.generateContent({
@@ -595,20 +622,36 @@ var GoogleDriver = class {
595
622
  maxOutputTokens: this.maxTokens,
596
623
  ...this.reasoningEffort && {
597
624
  thinkingConfig: {
598
- thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
625
+ thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
599
626
  }
600
627
  }
601
628
  }
602
629
  });
630
+ const finishReason = response.candidates?.[0]?.finishReason;
631
+ if (finishReason === "MAX_TOKENS") {
632
+ throw new VisualAITruncationError(
633
+ `Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
634
+ response.text ?? "",
635
+ this.maxTokens
636
+ );
637
+ }
638
+ if (finishReason && finishReason !== "STOP") {
639
+ throw new VisualAIProviderError(
640
+ `Response blocked: Google returned finishReason "${finishReason}".`
641
+ );
642
+ }
603
643
  const text = response.text ?? "";
644
+ const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
604
645
  return {
605
646
  text,
606
647
  usage: response.usageMetadata ? {
607
648
  inputTokens: response.usageMetadata.promptTokenCount ?? 0,
608
- outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
649
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
650
+ ...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
609
651
  } : void 0
610
652
  };
611
653
  } catch (err) {
654
+ if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
612
655
  throw mapProviderError(err);
613
656
  }
614
657
  }
@@ -680,17 +723,25 @@ var OpenAIDriver = class {
680
723
  this.client = new OpenAI({ apiKey });
681
724
  return this.client;
682
725
  }
683
- async sendMessage(images, prompt) {
726
+ async sendMessage(images, prompt, options) {
684
727
  const client = await this.getClient();
685
728
  const imageBlocks = images.map((img) => ({
686
729
  type: "input_image",
687
730
  image_url: `data:${img.mimeType};base64,${img.base64}`
688
731
  }));
689
732
  try {
733
+ const format = options?.responseSchema ? {
734
+ type: "json_schema",
735
+ json_schema: {
736
+ name: "visual_ai_response",
737
+ strict: true,
738
+ schema: options.responseSchema
739
+ }
740
+ } : { type: "json_object" };
690
741
  const requestParams = {
691
742
  model: this.model,
692
743
  max_output_tokens: this.maxTokens,
693
- text: { format: { type: "json_object" } },
744
+ text: { format },
694
745
  input: [
695
746
  {
696
747
  role: "user",
@@ -702,15 +753,26 @@ var OpenAIDriver = class {
702
753
  requestParams.reasoning = { effort: this.reasoningEffort };
703
754
  }
704
755
  const response = await client.responses.create(requestParams);
756
+ if (response.status && response.status !== "completed") {
757
+ const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
758
+ throw new VisualAITruncationError(
759
+ `Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
760
+ response.output_text ?? "",
761
+ this.maxTokens
762
+ );
763
+ }
705
764
  const text = response.output_text ?? "";
765
+ const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
706
766
  return {
707
767
  text,
708
768
  usage: response.usage ? {
709
769
  inputTokens: response.usage.input_tokens,
710
- outputTokens: response.usage.output_tokens
770
+ outputTokens: response.usage.output_tokens,
771
+ ...reasoningTokens !== void 0 && { reasoningTokens }
711
772
  } : void 0
712
773
  };
713
774
  } catch (err) {
775
+ if (err instanceof VisualAITruncationError) throw err;
714
776
  throw mapProviderError(err);
715
777
  }
716
778
  }
@@ -757,15 +819,6 @@ function parseBooleanEnv(envName, value) {
757
819
  `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
758
820
  );
759
821
  }
760
- var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
761
- function parseReasoningEffortEnv(envName, value) {
762
- if (value === void 0 || value === "") return void 0;
763
- const lower = value.toLowerCase();
764
- if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
765
- throw new VisualAIConfigError(
766
- `Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
767
- );
768
- }
769
822
  var debugDeprecationWarned = false;
770
823
  function resolveConfig(config) {
771
824
  const provider = resolveProvider(config);
@@ -780,12 +833,23 @@ function resolveConfig(config) {
780
833
  `
781
834
  );
782
835
  }
836
+ const userSetMaxTokens = config.maxTokens !== void 0;
837
+ let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
838
+ if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
839
+ maxTokens = OPENAI_REASONING_MAX_TOKENS;
840
+ if (debug) {
841
+ process.stderr.write(
842
+ `[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
843
+ `
844
+ );
845
+ }
846
+ }
783
847
  return {
784
848
  provider,
785
849
  apiKey: config.apiKey,
786
850
  model,
787
- maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
788
- reasoningEffort: config.reasoningEffort ?? parseReasoningEffortEnv("VISUAL_AI_REASONING_EFFORT", process.env.VISUAL_AI_REASONING_EFFORT),
851
+ maxTokens,
852
+ reasoningEffort: config.reasoningEffort,
789
853
  debug,
790
854
  debugPrompt,
791
855
  debugResponse,
@@ -836,6 +900,10 @@ var PRICING_TABLE = {
836
900
  inputPricePerToken: 2 / PER_MILLION,
837
901
  outputPricePerToken: 12 / PER_MILLION
838
902
  },
903
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
904
+ inputPricePerToken: 0.25 / PER_MILLION,
905
+ outputPricePerToken: 1.5 / PER_MILLION
906
+ },
839
907
  [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
840
908
  inputPricePerToken: 0.5 / PER_MILLION,
841
909
  outputPricePerToken: 3 / PER_MILLION
@@ -860,8 +928,9 @@ function usageLog(config, method, usage) {
860
928
  if (!config.trackUsage) return;
861
929
  const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
862
930
  const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
931
+ const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
863
932
  process.stderr.write(
864
- `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
933
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
865
934
  `
866
935
  );
867
936
  }
@@ -871,6 +940,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
871
940
  const usage = {
872
941
  inputTokens,
873
942
  outputTokens,
943
+ ...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
874
944
  estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
875
945
  durationSeconds
876
946
  };
@@ -879,6 +949,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
879
949
  }
880
950
  var MAX_RAW_RESPONSE_PREVIEW = 500;
881
951
  function formatError(error) {
952
+ if (error instanceof VisualAITruncationError) {
953
+ const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
954
+ return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
955
+ }
882
956
  if (error instanceof VisualAIResponseParseError) {
883
957
  const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
884
958
  return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
@@ -899,9 +973,9 @@ async function withErrorDebug(config, method, fn) {
899
973
  throw error;
900
974
  }
901
975
  }
902
- async function timedSendMessage(driver, images, prompt) {
976
+ async function timedSendMessage(driver, images, prompt, options) {
903
977
  const start = performance.now();
904
- const response = await driver.sendMessage(images, prompt);
978
+ const response = await driver.sendMessage(images, prompt, options);
905
979
  const durationSeconds = (performance.now() - start) / 1e3;
906
980
  return { ...response, durationSeconds };
907
981
  }
@@ -1141,6 +1215,8 @@ var StatementResultSchema = import_zod.z.object({
1141
1215
  var UsageInfoSchema = import_zod.z.object({
1142
1216
  inputTokens: import_zod.z.number(),
1143
1217
  outputTokens: import_zod.z.number(),
1218
+ /** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
1219
+ reasoningTokens: import_zod.z.number().optional(),
1144
1220
  estimatedCost: import_zod.z.number().optional(),
1145
1221
  durationSeconds: import_zod.z.number().nonnegative().optional()
1146
1222
  });
@@ -1194,8 +1270,24 @@ function parseResponse(raw, schema) {
1194
1270
  }
1195
1271
  return result.data;
1196
1272
  }
1273
+ function reconcileCheckResult(result) {
1274
+ if (result.statements.length === 0) {
1275
+ return result;
1276
+ }
1277
+ const passCount = result.statements.filter((s) => s.pass).length;
1278
+ const total = result.statements.length;
1279
+ const computedPass = passCount === total;
1280
+ const countPrefix = `${passCount} of ${total} checks passed`;
1281
+ const reasoning = `${countPrefix}. ${result.reasoning}`;
1282
+ return {
1283
+ ...result,
1284
+ pass: computedPass,
1285
+ reasoning
1286
+ };
1287
+ }
1197
1288
  function parseCheckResponse(raw) {
1198
- return parseResponse(raw, CheckResponseSchema);
1289
+ const result = parseResponse(raw, CheckResponseSchema);
1290
+ return reconcileCheckResult(result);
1199
1291
  }
1200
1292
  function parseAskResponse(raw) {
1201
1293
  return parseResponse(raw, AskResponseSchema);
@@ -1205,6 +1297,12 @@ function parseCompareResponse(raw) {
1205
1297
  }
1206
1298
 
1207
1299
  // src/core/client.ts
1300
+ var import_zod_to_json_schema = require("zod-to-json-schema");
1301
+ function toSchemaOptions(schema) {
1302
+ return {
1303
+ responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
1304
+ };
1305
+ }
1208
1306
  var PROVIDER_REGISTRY = {
1209
1307
  anthropic: (config) => new AnthropicDriver(config),
1210
1308
  openai: (config) => new OpenAIDriver(config),
@@ -1213,6 +1311,9 @@ var PROVIDER_REGISTRY = {
1213
1311
  function createDriver(provider, config) {
1214
1312
  return PROVIDER_REGISTRY[provider](config);
1215
1313
  }
1314
+ var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
1315
+ var askSchemaOptions = toSchemaOptions(AskResponseSchema);
1316
+ var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
1216
1317
  function visualAI(config = {}) {
1217
1318
  const resolvedConfig = resolveConfig(config);
1218
1319
  const driverConfig = {
@@ -1231,7 +1332,7 @@ function visualAI(config = {}) {
1231
1332
  const img = await normalizeImage(image);
1232
1333
  const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1233
1334
  debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
1234
- const response = await timedSendMessage(driver, [img], prompt);
1335
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1235
1336
  debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
1236
1337
  const result = parseCheckResponse(response.text);
1237
1338
  return {
@@ -1250,7 +1351,7 @@ function visualAI(config = {}) {
1250
1351
  const img = await normalizeImage(image);
1251
1352
  const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1252
1353
  debugLog(resolvedConfig, "check prompt", prompt, "prompt");
1253
- const response = await timedSendMessage(driver, [img], prompt);
1354
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1254
1355
  debugLog(resolvedConfig, "check response", response.text, "response");
1255
1356
  const result = parseCheckResponse(response.text);
1256
1357
  return {
@@ -1264,7 +1365,7 @@ function visualAI(config = {}) {
1264
1365
  const img = await normalizeImage(image);
1265
1366
  const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1266
1367
  debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
1267
- const response = await timedSendMessage(driver, [img], prompt);
1368
+ const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
1268
1369
  debugLog(resolvedConfig, "ask response", response.text, "response");
1269
1370
  const result = parseAskResponse(response.text);
1270
1371
  return {
@@ -1281,7 +1382,7 @@ function visualAI(config = {}) {
1281
1382
  instructions: options?.instructions
1282
1383
  });
1283
1384
  debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
1284
- const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1385
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
1285
1386
  debugLog(resolvedConfig, "compare response", response.text, "response");
1286
1387
  const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1287
1388
  const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
@@ -1316,7 +1417,7 @@ function visualAI(config = {}) {
1316
1417
  const img = await normalizeImage(image);
1317
1418
  const prompt = buildAccessibilityPrompt(options);
1318
1419
  debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
1319
- const response = await timedSendMessage(driver, [img], prompt);
1420
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1320
1421
  debugLog(resolvedConfig, "accessibility response", response.text, "response");
1321
1422
  const result = parseCheckResponse(response.text);
1322
1423
  return {
@@ -1335,7 +1436,7 @@ function visualAI(config = {}) {
1335
1436
  const img = await normalizeImage(image);
1336
1437
  const prompt = buildLayoutPrompt(options);
1337
1438
  debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
1338
- const response = await timedSendMessage(driver, [img], prompt);
1439
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1339
1440
  debugLog(resolvedConfig, "layout response", response.text, "response");
1340
1441
  const result = parseCheckResponse(response.text);
1341
1442
  return {
@@ -1349,7 +1450,7 @@ function visualAI(config = {}) {
1349
1450
  const img = await normalizeImage(image);
1350
1451
  const prompt = buildPageLoadPrompt(options);
1351
1452
  debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
1352
- const response = await timedSendMessage(driver, [img], prompt);
1453
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1353
1454
  debugLog(resolvedConfig, "pageLoad response", response.text, "response");
1354
1455
  const result = parseCheckResponse(response.text);
1355
1456
  return {
@@ -1363,7 +1464,7 @@ function visualAI(config = {}) {
1363
1464
  const img = await normalizeImage(image);
1364
1465
  const prompt = buildContentPrompt(options);
1365
1466
  debugLog(resolvedConfig, "content prompt", prompt, "prompt");
1366
- const response = await timedSendMessage(driver, [img], prompt);
1467
+ const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
1367
1468
  debugLog(resolvedConfig, "content response", response.text, "response");
1368
1469
  const result = parseCheckResponse(response.text);
1369
1470
  return {
@@ -1455,6 +1556,7 @@ function assertVisualCompareResult(result, label) {
1455
1556
  VisualAIProviderError,
1456
1557
  VisualAIRateLimitError,
1457
1558
  VisualAIResponseParseError,
1559
+ VisualAITruncationError,
1458
1560
  assertVisualCompareResult,
1459
1561
  assertVisualResult,
1460
1562
  formatCheckResult,