visual-ai-assertions 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -18
- package/dist/index.cjs +144 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +43 -3
- package/dist/index.d.ts +43 -3
- package/dist/index.js +143 -42
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -387,14 +387,13 @@ The `VisualAIKnownError` union and `isVisualAIKnownError()` helper are useful wh
|
|
|
387
387
|
|
|
388
388
|
### Optional Configuration
|
|
389
389
|
|
|
390
|
-
| Variable
|
|
391
|
-
|
|
|
392
|
-
| `VISUAL_AI_MODEL`
|
|
393
|
-
| `VISUAL_AI_DEBUG`
|
|
394
|
-
| `VISUAL_AI_DEBUG_PROMPT`
|
|
395
|
-
| `VISUAL_AI_DEBUG_RESPONSE`
|
|
396
|
-
| `
|
|
397
|
-
| `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
|
|
390
|
+
| Variable | Description |
|
|
391
|
+
| -------------------------- | -------------------------------------------------------------------------------------------------------------- |
|
|
392
|
+
| `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
|
|
393
|
+
| `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
|
|
394
|
+
| `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
|
|
395
|
+
| `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
|
|
396
|
+
| `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
|
|
398
397
|
|
|
399
398
|
## Configuration
|
|
400
399
|
|
|
@@ -468,19 +467,22 @@ All listed models support image/vision input. Pass any model ID to the `model` c
|
|
|
468
467
|
|
|
469
468
|
### OpenAI
|
|
470
469
|
|
|
471
|
-
| Model
|
|
472
|
-
|
|
|
473
|
-
| GPT-5.4 Pro
|
|
474
|
-
| GPT-5.4
|
|
475
|
-
| GPT-5.2
|
|
476
|
-
| GPT-5 mini
|
|
470
|
+
| Model | Model ID | Input $/MTok | Output $/MTok | Notes |
|
|
471
|
+
| ------------ | -------------- | ------------ | ------------- | ------------------------------ |
|
|
472
|
+
| GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
|
|
473
|
+
| GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
|
|
474
|
+
| GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
|
|
475
|
+
| GPT-5.4 mini | `gpt-5.4-mini` | $0.75 | $4.50 | Fast and affordable |
|
|
476
|
+
| GPT-5.4 nano | `gpt-5.4-nano` | $0.20 | $1.25 | Cheapest OpenAI option |
|
|
477
|
+
| GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
|
|
477
478
|
|
|
478
479
|
### Google
|
|
479
480
|
|
|
480
|
-
| Model
|
|
481
|
-
|
|
|
482
|
-
| Gemini 3.1 Pro
|
|
483
|
-
| Gemini 3 Flash | `gemini-3-flash-preview` | $0.
|
|
481
|
+
| Model | Model ID | Input $/MTok | Output $/MTok | Notes |
|
|
482
|
+
| --------------------- | ------------------------------- | ------------ | ------------- | --------------------------------- |
|
|
483
|
+
| Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
|
|
484
|
+
| Gemini 3.1 Flash Lite | `gemini-3.1-flash-lite-preview` | $0.25 | $1.50 | Preview — lightweight and cheap |
|
|
485
|
+
| Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default** — fast and capable |
|
|
484
486
|
|
|
485
487
|
## License
|
|
486
488
|
|
package/dist/index.cjs
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
VisualAIProviderError: () => VisualAIProviderError,
|
|
55
55
|
VisualAIRateLimitError: () => VisualAIRateLimitError,
|
|
56
56
|
VisualAIResponseParseError: () => VisualAIResponseParseError,
|
|
57
|
+
VisualAITruncationError: () => VisualAITruncationError,
|
|
57
58
|
assertVisualCompareResult: () => assertVisualCompareResult,
|
|
58
59
|
assertVisualResult: () => assertVisualResult,
|
|
59
60
|
formatCheckResult: () => formatCheckResult,
|
|
@@ -85,6 +86,7 @@ var Model = {
|
|
|
85
86
|
},
|
|
86
87
|
Google: {
|
|
87
88
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
89
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
88
90
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
89
91
|
}
|
|
90
92
|
};
|
|
@@ -94,6 +96,7 @@ var DEFAULT_MODELS = {
|
|
|
94
96
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
95
97
|
};
|
|
96
98
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
99
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
97
100
|
var MODEL_TO_PROVIDER = new Map([
|
|
98
101
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
99
102
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
@@ -178,6 +181,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
178
181
|
this.rawResponse = rawResponse;
|
|
179
182
|
}
|
|
180
183
|
};
|
|
184
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
185
|
+
partialResponse;
|
|
186
|
+
maxTokens;
|
|
187
|
+
constructor(message, partialResponse, maxTokens) {
|
|
188
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
189
|
+
this.name = "VisualAITruncationError";
|
|
190
|
+
this.partialResponse = partialResponse;
|
|
191
|
+
this.maxTokens = maxTokens;
|
|
192
|
+
}
|
|
193
|
+
};
|
|
181
194
|
var VisualAIConfigError = class extends VisualAIError {
|
|
182
195
|
constructor(message) {
|
|
183
196
|
super(message, "CONFIG_INVALID");
|
|
@@ -193,7 +206,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
193
206
|
}
|
|
194
207
|
};
|
|
195
208
|
function isVisualAIKnownError(error) {
|
|
196
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
209
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
197
210
|
}
|
|
198
211
|
|
|
199
212
|
// src/core/prompt.ts
|
|
@@ -207,12 +220,18 @@ Each issue must have:
|
|
|
207
220
|
- "description": what the issue is
|
|
208
221
|
- "suggestion": how to fix or improve it
|
|
209
222
|
`;
|
|
210
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
223
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
224
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
225
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
226
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
227
|
+
4. Include "issues" only for statements that failed
|
|
228
|
+
|
|
229
|
+
Respond with a JSON object matching this exact structure:
|
|
211
230
|
{
|
|
212
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
213
|
-
"reasoning": string, // brief overall summary
|
|
214
|
-
"issues": [...], //
|
|
215
|
-
"statements": [ // one entry per statement, in order
|
|
231
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
232
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
233
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
234
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
216
235
|
{
|
|
217
236
|
"statement": string, // the original statement text
|
|
218
237
|
"pass": boolean, // whether this statement is true
|
|
@@ -231,7 +250,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
231
250
|
Example for a failing check:
|
|
232
251
|
{
|
|
233
252
|
"pass": false,
|
|
234
|
-
"reasoning": "
|
|
253
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
235
254
|
"issues": [
|
|
236
255
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
237
256
|
],
|
|
@@ -491,7 +510,7 @@ var AnthropicDriver = class {
|
|
|
491
510
|
this.client = new Anthropic({ apiKey });
|
|
492
511
|
return this.client;
|
|
493
512
|
}
|
|
494
|
-
async sendMessage(images, prompt) {
|
|
513
|
+
async sendMessage(images, prompt, _options) {
|
|
495
514
|
const client = await this.getClient();
|
|
496
515
|
const imageBlocks = images.map((img) => ({
|
|
497
516
|
type: "image",
|
|
@@ -521,6 +540,13 @@ var AnthropicDriver = class {
|
|
|
521
540
|
const message = await client.messages.create(requestParams);
|
|
522
541
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
523
542
|
const text = textBlock?.text ?? "";
|
|
543
|
+
if (message.stop_reason === "max_tokens") {
|
|
544
|
+
throw new VisualAITruncationError(
|
|
545
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
546
|
+
text,
|
|
547
|
+
this.maxTokens
|
|
548
|
+
);
|
|
549
|
+
}
|
|
524
550
|
return {
|
|
525
551
|
text,
|
|
526
552
|
usage: {
|
|
@@ -529,6 +555,7 @@ var AnthropicDriver = class {
|
|
|
529
555
|
}
|
|
530
556
|
};
|
|
531
557
|
} catch (err) {
|
|
558
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
532
559
|
throw mapProviderError(err);
|
|
533
560
|
}
|
|
534
561
|
}
|
|
@@ -540,11 +567,11 @@ function needsCodeExecution(model) {
|
|
|
540
567
|
const match = model.match(/^gemini-(\d+)/);
|
|
541
568
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
542
569
|
}
|
|
543
|
-
var
|
|
544
|
-
low:
|
|
545
|
-
medium:
|
|
546
|
-
high:
|
|
547
|
-
xhigh:
|
|
570
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
571
|
+
low: "minimal",
|
|
572
|
+
medium: "low",
|
|
573
|
+
high: "medium",
|
|
574
|
+
xhigh: "high"
|
|
548
575
|
};
|
|
549
576
|
var GoogleDriver = class {
|
|
550
577
|
client;
|
|
@@ -584,7 +611,7 @@ var GoogleDriver = class {
|
|
|
584
611
|
this.client = new GoogleGenAI({ apiKey });
|
|
585
612
|
return this.client;
|
|
586
613
|
}
|
|
587
|
-
async sendMessage(images, prompt) {
|
|
614
|
+
async sendMessage(images, prompt, _options) {
|
|
588
615
|
const client = await this.getClient();
|
|
589
616
|
try {
|
|
590
617
|
const response = await client.models.generateContent({
|
|
@@ -595,20 +622,36 @@ var GoogleDriver = class {
|
|
|
595
622
|
maxOutputTokens: this.maxTokens,
|
|
596
623
|
...this.reasoningEffort && {
|
|
597
624
|
thinkingConfig: {
|
|
598
|
-
|
|
625
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
599
626
|
}
|
|
600
627
|
}
|
|
601
628
|
}
|
|
602
629
|
});
|
|
630
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
631
|
+
if (finishReason === "MAX_TOKENS") {
|
|
632
|
+
throw new VisualAITruncationError(
|
|
633
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
634
|
+
response.text ?? "",
|
|
635
|
+
this.maxTokens
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
if (finishReason && finishReason !== "STOP") {
|
|
639
|
+
throw new VisualAIProviderError(
|
|
640
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
641
|
+
);
|
|
642
|
+
}
|
|
603
643
|
const text = response.text ?? "";
|
|
644
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
604
645
|
return {
|
|
605
646
|
text,
|
|
606
647
|
usage: response.usageMetadata ? {
|
|
607
648
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
608
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
649
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
650
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
609
651
|
} : void 0
|
|
610
652
|
};
|
|
611
653
|
} catch (err) {
|
|
654
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
612
655
|
throw mapProviderError(err);
|
|
613
656
|
}
|
|
614
657
|
}
|
|
@@ -680,17 +723,25 @@ var OpenAIDriver = class {
|
|
|
680
723
|
this.client = new OpenAI({ apiKey });
|
|
681
724
|
return this.client;
|
|
682
725
|
}
|
|
683
|
-
async sendMessage(images, prompt) {
|
|
726
|
+
async sendMessage(images, prompt, options) {
|
|
684
727
|
const client = await this.getClient();
|
|
685
728
|
const imageBlocks = images.map((img) => ({
|
|
686
729
|
type: "input_image",
|
|
687
730
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
688
731
|
}));
|
|
689
732
|
try {
|
|
733
|
+
const format = options?.responseSchema ? {
|
|
734
|
+
type: "json_schema",
|
|
735
|
+
json_schema: {
|
|
736
|
+
name: "visual_ai_response",
|
|
737
|
+
strict: true,
|
|
738
|
+
schema: options.responseSchema
|
|
739
|
+
}
|
|
740
|
+
} : { type: "json_object" };
|
|
690
741
|
const requestParams = {
|
|
691
742
|
model: this.model,
|
|
692
743
|
max_output_tokens: this.maxTokens,
|
|
693
|
-
text: { format
|
|
744
|
+
text: { format },
|
|
694
745
|
input: [
|
|
695
746
|
{
|
|
696
747
|
role: "user",
|
|
@@ -702,15 +753,26 @@ var OpenAIDriver = class {
|
|
|
702
753
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
703
754
|
}
|
|
704
755
|
const response = await client.responses.create(requestParams);
|
|
756
|
+
if (response.status && response.status !== "completed") {
|
|
757
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
758
|
+
throw new VisualAITruncationError(
|
|
759
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
760
|
+
response.output_text ?? "",
|
|
761
|
+
this.maxTokens
|
|
762
|
+
);
|
|
763
|
+
}
|
|
705
764
|
const text = response.output_text ?? "";
|
|
765
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
706
766
|
return {
|
|
707
767
|
text,
|
|
708
768
|
usage: response.usage ? {
|
|
709
769
|
inputTokens: response.usage.input_tokens,
|
|
710
|
-
outputTokens: response.usage.output_tokens
|
|
770
|
+
outputTokens: response.usage.output_tokens,
|
|
771
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
711
772
|
} : void 0
|
|
712
773
|
};
|
|
713
774
|
} catch (err) {
|
|
775
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
714
776
|
throw mapProviderError(err);
|
|
715
777
|
}
|
|
716
778
|
}
|
|
@@ -757,15 +819,6 @@ function parseBooleanEnv(envName, value) {
|
|
|
757
819
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
758
820
|
);
|
|
759
821
|
}
|
|
760
|
-
var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
|
|
761
|
-
function parseReasoningEffortEnv(envName, value) {
|
|
762
|
-
if (value === void 0 || value === "") return void 0;
|
|
763
|
-
const lower = value.toLowerCase();
|
|
764
|
-
if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
|
|
765
|
-
throw new VisualAIConfigError(
|
|
766
|
-
`Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
|
|
767
|
-
);
|
|
768
|
-
}
|
|
769
822
|
var debugDeprecationWarned = false;
|
|
770
823
|
function resolveConfig(config) {
|
|
771
824
|
const provider = resolveProvider(config);
|
|
@@ -780,12 +833,23 @@ function resolveConfig(config) {
|
|
|
780
833
|
`
|
|
781
834
|
);
|
|
782
835
|
}
|
|
836
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
837
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
838
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
839
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
840
|
+
if (debug) {
|
|
841
|
+
process.stderr.write(
|
|
842
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
843
|
+
`
|
|
844
|
+
);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
783
847
|
return {
|
|
784
848
|
provider,
|
|
785
849
|
apiKey: config.apiKey,
|
|
786
850
|
model,
|
|
787
|
-
maxTokens
|
|
788
|
-
reasoningEffort: config.reasoningEffort
|
|
851
|
+
maxTokens,
|
|
852
|
+
reasoningEffort: config.reasoningEffort,
|
|
789
853
|
debug,
|
|
790
854
|
debugPrompt,
|
|
791
855
|
debugResponse,
|
|
@@ -836,6 +900,10 @@ var PRICING_TABLE = {
|
|
|
836
900
|
inputPricePerToken: 2 / PER_MILLION,
|
|
837
901
|
outputPricePerToken: 12 / PER_MILLION
|
|
838
902
|
},
|
|
903
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
904
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
905
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
906
|
+
},
|
|
839
907
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
840
908
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
841
909
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -860,8 +928,9 @@ function usageLog(config, method, usage) {
|
|
|
860
928
|
if (!config.trackUsage) return;
|
|
861
929
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
862
930
|
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
931
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
863
932
|
process.stderr.write(
|
|
864
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
933
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
865
934
|
`
|
|
866
935
|
);
|
|
867
936
|
}
|
|
@@ -871,6 +940,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
871
940
|
const usage = {
|
|
872
941
|
inputTokens,
|
|
873
942
|
outputTokens,
|
|
943
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
874
944
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
875
945
|
durationSeconds
|
|
876
946
|
};
|
|
@@ -879,6 +949,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
879
949
|
}
|
|
880
950
|
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
881
951
|
function formatError(error) {
|
|
952
|
+
if (error instanceof VisualAITruncationError) {
|
|
953
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
954
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
955
|
+
}
|
|
882
956
|
if (error instanceof VisualAIResponseParseError) {
|
|
883
957
|
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
884
958
|
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
@@ -899,9 +973,9 @@ async function withErrorDebug(config, method, fn) {
|
|
|
899
973
|
throw error;
|
|
900
974
|
}
|
|
901
975
|
}
|
|
902
|
-
async function timedSendMessage(driver, images, prompt) {
|
|
976
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
903
977
|
const start = performance.now();
|
|
904
|
-
const response = await driver.sendMessage(images, prompt);
|
|
978
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
905
979
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
906
980
|
return { ...response, durationSeconds };
|
|
907
981
|
}
|
|
@@ -1141,6 +1215,8 @@ var StatementResultSchema = import_zod.z.object({
|
|
|
1141
1215
|
var UsageInfoSchema = import_zod.z.object({
|
|
1142
1216
|
inputTokens: import_zod.z.number(),
|
|
1143
1217
|
outputTokens: import_zod.z.number(),
|
|
1218
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1219
|
+
reasoningTokens: import_zod.z.number().optional(),
|
|
1144
1220
|
estimatedCost: import_zod.z.number().optional(),
|
|
1145
1221
|
durationSeconds: import_zod.z.number().nonnegative().optional()
|
|
1146
1222
|
});
|
|
@@ -1194,8 +1270,24 @@ function parseResponse(raw, schema) {
|
|
|
1194
1270
|
}
|
|
1195
1271
|
return result.data;
|
|
1196
1272
|
}
|
|
1273
|
+
function reconcileCheckResult(result) {
|
|
1274
|
+
if (result.statements.length === 0) {
|
|
1275
|
+
return result;
|
|
1276
|
+
}
|
|
1277
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1278
|
+
const total = result.statements.length;
|
|
1279
|
+
const computedPass = passCount === total;
|
|
1280
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1281
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1282
|
+
return {
|
|
1283
|
+
...result,
|
|
1284
|
+
pass: computedPass,
|
|
1285
|
+
reasoning
|
|
1286
|
+
};
|
|
1287
|
+
}
|
|
1197
1288
|
function parseCheckResponse(raw) {
|
|
1198
|
-
|
|
1289
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1290
|
+
return reconcileCheckResult(result);
|
|
1199
1291
|
}
|
|
1200
1292
|
function parseAskResponse(raw) {
|
|
1201
1293
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1205,6 +1297,12 @@ function parseCompareResponse(raw) {
|
|
|
1205
1297
|
}
|
|
1206
1298
|
|
|
1207
1299
|
// src/core/client.ts
|
|
1300
|
+
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
1301
|
+
function toSchemaOptions(schema) {
|
|
1302
|
+
return {
|
|
1303
|
+
responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1208
1306
|
var PROVIDER_REGISTRY = {
|
|
1209
1307
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1210
1308
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1213,6 +1311,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1213
1311
|
function createDriver(provider, config) {
|
|
1214
1312
|
return PROVIDER_REGISTRY[provider](config);
|
|
1215
1313
|
}
|
|
1314
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1315
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1316
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1216
1317
|
function visualAI(config = {}) {
|
|
1217
1318
|
const resolvedConfig = resolveConfig(config);
|
|
1218
1319
|
const driverConfig = {
|
|
@@ -1231,7 +1332,7 @@ function visualAI(config = {}) {
|
|
|
1231
1332
|
const img = await normalizeImage(image);
|
|
1232
1333
|
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1233
1334
|
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1234
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1335
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1235
1336
|
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1236
1337
|
const result = parseCheckResponse(response.text);
|
|
1237
1338
|
return {
|
|
@@ -1250,7 +1351,7 @@ function visualAI(config = {}) {
|
|
|
1250
1351
|
const img = await normalizeImage(image);
|
|
1251
1352
|
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1252
1353
|
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1253
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1354
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1254
1355
|
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1255
1356
|
const result = parseCheckResponse(response.text);
|
|
1256
1357
|
return {
|
|
@@ -1264,7 +1365,7 @@ function visualAI(config = {}) {
|
|
|
1264
1365
|
const img = await normalizeImage(image);
|
|
1265
1366
|
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1266
1367
|
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1267
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1368
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1268
1369
|
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1269
1370
|
const result = parseAskResponse(response.text);
|
|
1270
1371
|
return {
|
|
@@ -1281,7 +1382,7 @@ function visualAI(config = {}) {
|
|
|
1281
1382
|
instructions: options?.instructions
|
|
1282
1383
|
});
|
|
1283
1384
|
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1284
|
-
const response = await timedSendMessage(driver, [imgA, imgB], prompt);
|
|
1385
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1285
1386
|
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1286
1387
|
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1287
1388
|
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
@@ -1316,7 +1417,7 @@ function visualAI(config = {}) {
|
|
|
1316
1417
|
const img = await normalizeImage(image);
|
|
1317
1418
|
const prompt = buildAccessibilityPrompt(options);
|
|
1318
1419
|
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1319
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1420
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1320
1421
|
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1321
1422
|
const result = parseCheckResponse(response.text);
|
|
1322
1423
|
return {
|
|
@@ -1335,7 +1436,7 @@ function visualAI(config = {}) {
|
|
|
1335
1436
|
const img = await normalizeImage(image);
|
|
1336
1437
|
const prompt = buildLayoutPrompt(options);
|
|
1337
1438
|
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1338
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1439
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1339
1440
|
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1340
1441
|
const result = parseCheckResponse(response.text);
|
|
1341
1442
|
return {
|
|
@@ -1349,7 +1450,7 @@ function visualAI(config = {}) {
|
|
|
1349
1450
|
const img = await normalizeImage(image);
|
|
1350
1451
|
const prompt = buildPageLoadPrompt(options);
|
|
1351
1452
|
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1352
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1453
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1353
1454
|
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1354
1455
|
const result = parseCheckResponse(response.text);
|
|
1355
1456
|
return {
|
|
@@ -1363,7 +1464,7 @@ function visualAI(config = {}) {
|
|
|
1363
1464
|
const img = await normalizeImage(image);
|
|
1364
1465
|
const prompt = buildContentPrompt(options);
|
|
1365
1466
|
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1366
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1467
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1367
1468
|
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1368
1469
|
const result = parseCheckResponse(response.text);
|
|
1369
1470
|
return {
|
|
@@ -1455,6 +1556,7 @@ function assertVisualCompareResult(result, label) {
|
|
|
1455
1556
|
VisualAIProviderError,
|
|
1456
1557
|
VisualAIRateLimitError,
|
|
1457
1558
|
VisualAIResponseParseError,
|
|
1559
|
+
VisualAITruncationError,
|
|
1458
1560
|
assertVisualCompareResult,
|
|
1459
1561
|
assertVisualResult,
|
|
1460
1562
|
formatCheckResult,
|