visual-ai-assertions 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -18
- package/dist/index.cjs +152 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +53 -6
- package/dist/index.d.ts +53 -6
- package/dist/index.js +150 -42
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -387,14 +387,13 @@ The `VisualAIKnownError` union and `isVisualAIKnownError()` helper are useful wh
|
|
|
387
387
|
|
|
388
388
|
### Optional Configuration
|
|
389
389
|
|
|
390
|
-
| Variable
|
|
391
|
-
|
|
|
392
|
-
| `VISUAL_AI_MODEL`
|
|
393
|
-
| `VISUAL_AI_DEBUG`
|
|
394
|
-
| `VISUAL_AI_DEBUG_PROMPT`
|
|
395
|
-
| `VISUAL_AI_DEBUG_RESPONSE`
|
|
396
|
-
| `
|
|
397
|
-
| `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
|
|
390
|
+
| Variable | Description |
|
|
391
|
+
| -------------------------- | -------------------------------------------------------------------------------------------------------------- |
|
|
392
|
+
| `VISUAL_AI_MODEL` | Default model when `model` is not set in config. Overrides the provider's default model. |
|
|
393
|
+
| `VISUAL_AI_DEBUG` | Enable error diagnostic logging to stderr. Does **not** enable prompt/response logging. Use `"true"` or `"1"`. |
|
|
394
|
+
| `VISUAL_AI_DEBUG_PROMPT` | Enable prompt-only debug logging to stderr. Use `"true"` or `"1"`. |
|
|
395
|
+
| `VISUAL_AI_DEBUG_RESPONSE` | Enable response-only debug logging to stderr. Use `"true"` or `"1"`. |
|
|
396
|
+
| `VISUAL_AI_TRACK_USAGE` | Enable usage tracking (token counts and cost) to stderr. Use `"true"` or `"1"`. |
|
|
398
397
|
|
|
399
398
|
## Configuration
|
|
400
399
|
|
|
@@ -468,19 +467,22 @@ All listed models support image/vision input. Pass any model ID to the `model` c
|
|
|
468
467
|
|
|
469
468
|
### OpenAI
|
|
470
469
|
|
|
471
|
-
| Model
|
|
472
|
-
|
|
|
473
|
-
| GPT-5.4 Pro
|
|
474
|
-
| GPT-5.4
|
|
475
|
-
| GPT-5.2
|
|
476
|
-
| GPT-5 mini
|
|
470
|
+
| Model | Model ID | Input $/MTok | Output $/MTok | Notes |
|
|
471
|
+
| ------------ | -------------- | ------------ | ------------- | ------------------------------ |
|
|
472
|
+
| GPT-5.4 Pro | `gpt-5.4-pro` | $30 | $180 | Most capable, extended context |
|
|
473
|
+
| GPT-5.4 | `gpt-5.4` | $2.50 | $15 | Best vision quality |
|
|
474
|
+
| GPT-5.2 | `gpt-5.2` | $1.75 | $14 | Balanced quality and cost |
|
|
475
|
+
| GPT-5.4 mini | `gpt-5.4-mini` | $0.75 | $4.50 | Fast and affordable |
|
|
476
|
+
| GPT-5.4 nano | `gpt-5.4-nano` | $0.20 | $1.25 | Cheapest OpenAI option |
|
|
477
|
+
| GPT-5 mini | `gpt-5-mini` | $0.25 | $2 | **Default** — fast and cheap |
|
|
477
478
|
|
|
478
479
|
### Google
|
|
479
480
|
|
|
480
|
-
| Model
|
|
481
|
-
|
|
|
482
|
-
| Gemini 3.1 Pro
|
|
483
|
-
| Gemini 3 Flash | `gemini-3-flash-preview` | $0.
|
|
481
|
+
| Model | Model ID | Input $/MTok | Output $/MTok | Notes |
|
|
482
|
+
| --------------------- | ------------------------------- | ------------ | ------------- | --------------------------------- |
|
|
483
|
+
| Gemini 3.1 Pro | `gemini-3.1-pro-preview` | $2 | $12 | Preview — most advanced reasoning |
|
|
484
|
+
| Gemini 3.1 Flash Lite | `gemini-3.1-flash-lite-preview` | $0.25 | $1.50 | Preview — lightweight and cheap |
|
|
485
|
+
| Gemini 3 Flash | `gemini-3-flash-preview` | $0.50 | $3 | **Default** — fast and capable |
|
|
484
486
|
|
|
485
487
|
## License
|
|
486
488
|
|
package/dist/index.cjs
CHANGED
|
@@ -44,6 +44,7 @@ __export(index_exports, {
|
|
|
44
44
|
Layout: () => Layout,
|
|
45
45
|
Model: () => Model,
|
|
46
46
|
Provider: () => Provider,
|
|
47
|
+
ReasoningEffort: () => ReasoningEffort,
|
|
47
48
|
StatementResultSchema: () => StatementResultSchema,
|
|
48
49
|
UsageInfoSchema: () => UsageInfoSchema,
|
|
49
50
|
VisualAIAssertionError: () => VisualAIAssertionError,
|
|
@@ -54,6 +55,7 @@ __export(index_exports, {
|
|
|
54
55
|
VisualAIProviderError: () => VisualAIProviderError,
|
|
55
56
|
VisualAIRateLimitError: () => VisualAIRateLimitError,
|
|
56
57
|
VisualAIResponseParseError: () => VisualAIResponseParseError,
|
|
58
|
+
VisualAITruncationError: () => VisualAITruncationError,
|
|
57
59
|
assertVisualCompareResult: () => assertVisualCompareResult,
|
|
58
60
|
assertVisualResult: () => assertVisualResult,
|
|
59
61
|
formatCheckResult: () => formatCheckResult,
|
|
@@ -64,6 +66,12 @@ __export(index_exports, {
|
|
|
64
66
|
module.exports = __toCommonJS(index_exports);
|
|
65
67
|
|
|
66
68
|
// src/constants.ts
|
|
69
|
+
var ReasoningEffort = {
|
|
70
|
+
LOW: "low",
|
|
71
|
+
MEDIUM: "medium",
|
|
72
|
+
HIGH: "high",
|
|
73
|
+
XHIGH: "xhigh"
|
|
74
|
+
};
|
|
67
75
|
var Provider = {
|
|
68
76
|
ANTHROPIC: "anthropic",
|
|
69
77
|
OPENAI: "openai",
|
|
@@ -85,6 +93,7 @@ var Model = {
|
|
|
85
93
|
},
|
|
86
94
|
Google: {
|
|
87
95
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
96
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
88
97
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
89
98
|
}
|
|
90
99
|
};
|
|
@@ -94,6 +103,7 @@ var DEFAULT_MODELS = {
|
|
|
94
103
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
95
104
|
};
|
|
96
105
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
106
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
97
107
|
var MODEL_TO_PROVIDER = new Map([
|
|
98
108
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
99
109
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
@@ -178,6 +188,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
178
188
|
this.rawResponse = rawResponse;
|
|
179
189
|
}
|
|
180
190
|
};
|
|
191
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
192
|
+
partialResponse;
|
|
193
|
+
maxTokens;
|
|
194
|
+
constructor(message, partialResponse, maxTokens) {
|
|
195
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
196
|
+
this.name = "VisualAITruncationError";
|
|
197
|
+
this.partialResponse = partialResponse;
|
|
198
|
+
this.maxTokens = maxTokens;
|
|
199
|
+
}
|
|
200
|
+
};
|
|
181
201
|
var VisualAIConfigError = class extends VisualAIError {
|
|
182
202
|
constructor(message) {
|
|
183
203
|
super(message, "CONFIG_INVALID");
|
|
@@ -193,7 +213,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
193
213
|
}
|
|
194
214
|
};
|
|
195
215
|
function isVisualAIKnownError(error) {
|
|
196
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
216
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
197
217
|
}
|
|
198
218
|
|
|
199
219
|
// src/core/prompt.ts
|
|
@@ -207,12 +227,18 @@ Each issue must have:
|
|
|
207
227
|
- "description": what the issue is
|
|
208
228
|
- "suggestion": how to fix or improve it
|
|
209
229
|
`;
|
|
210
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
230
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
231
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
232
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
233
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
234
|
+
4. Include "issues" only for statements that failed
|
|
235
|
+
|
|
236
|
+
Respond with a JSON object matching this exact structure:
|
|
211
237
|
{
|
|
212
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
213
|
-
"reasoning": string, // brief overall summary
|
|
214
|
-
"issues": [...], //
|
|
215
|
-
"statements": [ // one entry per statement, in order
|
|
238
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
239
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
240
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
241
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
216
242
|
{
|
|
217
243
|
"statement": string, // the original statement text
|
|
218
244
|
"pass": boolean, // whether this statement is true
|
|
@@ -231,7 +257,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
231
257
|
Example for a failing check:
|
|
232
258
|
{
|
|
233
259
|
"pass": false,
|
|
234
|
-
"reasoning": "
|
|
260
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
235
261
|
"issues": [
|
|
236
262
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
237
263
|
],
|
|
@@ -491,7 +517,7 @@ var AnthropicDriver = class {
|
|
|
491
517
|
this.client = new Anthropic({ apiKey });
|
|
492
518
|
return this.client;
|
|
493
519
|
}
|
|
494
|
-
async sendMessage(images, prompt) {
|
|
520
|
+
async sendMessage(images, prompt, _options) {
|
|
495
521
|
const client = await this.getClient();
|
|
496
522
|
const imageBlocks = images.map((img) => ({
|
|
497
523
|
type: "image",
|
|
@@ -521,6 +547,13 @@ var AnthropicDriver = class {
|
|
|
521
547
|
const message = await client.messages.create(requestParams);
|
|
522
548
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
523
549
|
const text = textBlock?.text ?? "";
|
|
550
|
+
if (message.stop_reason === "max_tokens") {
|
|
551
|
+
throw new VisualAITruncationError(
|
|
552
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
553
|
+
text,
|
|
554
|
+
this.maxTokens
|
|
555
|
+
);
|
|
556
|
+
}
|
|
524
557
|
return {
|
|
525
558
|
text,
|
|
526
559
|
usage: {
|
|
@@ -529,6 +562,7 @@ var AnthropicDriver = class {
|
|
|
529
562
|
}
|
|
530
563
|
};
|
|
531
564
|
} catch (err) {
|
|
565
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
532
566
|
throw mapProviderError(err);
|
|
533
567
|
}
|
|
534
568
|
}
|
|
@@ -540,11 +574,11 @@ function needsCodeExecution(model) {
|
|
|
540
574
|
const match = model.match(/^gemini-(\d+)/);
|
|
541
575
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
542
576
|
}
|
|
543
|
-
var
|
|
544
|
-
low:
|
|
545
|
-
medium:
|
|
546
|
-
high:
|
|
547
|
-
xhigh:
|
|
577
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
578
|
+
low: "minimal",
|
|
579
|
+
medium: "low",
|
|
580
|
+
high: "medium",
|
|
581
|
+
xhigh: "high"
|
|
548
582
|
};
|
|
549
583
|
var GoogleDriver = class {
|
|
550
584
|
client;
|
|
@@ -584,7 +618,7 @@ var GoogleDriver = class {
|
|
|
584
618
|
this.client = new GoogleGenAI({ apiKey });
|
|
585
619
|
return this.client;
|
|
586
620
|
}
|
|
587
|
-
async sendMessage(images, prompt) {
|
|
621
|
+
async sendMessage(images, prompt, _options) {
|
|
588
622
|
const client = await this.getClient();
|
|
589
623
|
try {
|
|
590
624
|
const response = await client.models.generateContent({
|
|
@@ -595,20 +629,36 @@ var GoogleDriver = class {
|
|
|
595
629
|
maxOutputTokens: this.maxTokens,
|
|
596
630
|
...this.reasoningEffort && {
|
|
597
631
|
thinkingConfig: {
|
|
598
|
-
|
|
632
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
599
633
|
}
|
|
600
634
|
}
|
|
601
635
|
}
|
|
602
636
|
});
|
|
637
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
638
|
+
if (finishReason === "MAX_TOKENS") {
|
|
639
|
+
throw new VisualAITruncationError(
|
|
640
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
641
|
+
response.text ?? "",
|
|
642
|
+
this.maxTokens
|
|
643
|
+
);
|
|
644
|
+
}
|
|
645
|
+
if (finishReason && finishReason !== "STOP") {
|
|
646
|
+
throw new VisualAIProviderError(
|
|
647
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
648
|
+
);
|
|
649
|
+
}
|
|
603
650
|
const text = response.text ?? "";
|
|
651
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
604
652
|
return {
|
|
605
653
|
text,
|
|
606
654
|
usage: response.usageMetadata ? {
|
|
607
655
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
608
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
656
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
657
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
609
658
|
} : void 0
|
|
610
659
|
};
|
|
611
660
|
} catch (err) {
|
|
661
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
612
662
|
throw mapProviderError(err);
|
|
613
663
|
}
|
|
614
664
|
}
|
|
@@ -680,17 +730,25 @@ var OpenAIDriver = class {
|
|
|
680
730
|
this.client = new OpenAI({ apiKey });
|
|
681
731
|
return this.client;
|
|
682
732
|
}
|
|
683
|
-
async sendMessage(images, prompt) {
|
|
733
|
+
async sendMessage(images, prompt, options) {
|
|
684
734
|
const client = await this.getClient();
|
|
685
735
|
const imageBlocks = images.map((img) => ({
|
|
686
736
|
type: "input_image",
|
|
687
737
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
688
738
|
}));
|
|
689
739
|
try {
|
|
740
|
+
const format = options?.responseSchema ? {
|
|
741
|
+
type: "json_schema",
|
|
742
|
+
json_schema: {
|
|
743
|
+
name: "visual_ai_response",
|
|
744
|
+
strict: true,
|
|
745
|
+
schema: options.responseSchema
|
|
746
|
+
}
|
|
747
|
+
} : { type: "json_object", name: "visual_ai_response" };
|
|
690
748
|
const requestParams = {
|
|
691
749
|
model: this.model,
|
|
692
750
|
max_output_tokens: this.maxTokens,
|
|
693
|
-
text: { format
|
|
751
|
+
text: { format },
|
|
694
752
|
input: [
|
|
695
753
|
{
|
|
696
754
|
role: "user",
|
|
@@ -702,15 +760,26 @@ var OpenAIDriver = class {
|
|
|
702
760
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
703
761
|
}
|
|
704
762
|
const response = await client.responses.create(requestParams);
|
|
763
|
+
if (response.status && response.status !== "completed") {
|
|
764
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
765
|
+
throw new VisualAITruncationError(
|
|
766
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
767
|
+
response.output_text ?? "",
|
|
768
|
+
this.maxTokens
|
|
769
|
+
);
|
|
770
|
+
}
|
|
705
771
|
const text = response.output_text ?? "";
|
|
772
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
706
773
|
return {
|
|
707
774
|
text,
|
|
708
775
|
usage: response.usage ? {
|
|
709
776
|
inputTokens: response.usage.input_tokens,
|
|
710
|
-
outputTokens: response.usage.output_tokens
|
|
777
|
+
outputTokens: response.usage.output_tokens,
|
|
778
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
711
779
|
} : void 0
|
|
712
780
|
};
|
|
713
781
|
} catch (err) {
|
|
782
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
714
783
|
throw mapProviderError(err);
|
|
715
784
|
}
|
|
716
785
|
}
|
|
@@ -757,15 +826,6 @@ function parseBooleanEnv(envName, value) {
|
|
|
757
826
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
758
827
|
);
|
|
759
828
|
}
|
|
760
|
-
var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
|
|
761
|
-
function parseReasoningEffortEnv(envName, value) {
|
|
762
|
-
if (value === void 0 || value === "") return void 0;
|
|
763
|
-
const lower = value.toLowerCase();
|
|
764
|
-
if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
|
|
765
|
-
throw new VisualAIConfigError(
|
|
766
|
-
`Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
|
|
767
|
-
);
|
|
768
|
-
}
|
|
769
829
|
var debugDeprecationWarned = false;
|
|
770
830
|
function resolveConfig(config) {
|
|
771
831
|
const provider = resolveProvider(config);
|
|
@@ -780,12 +840,23 @@ function resolveConfig(config) {
|
|
|
780
840
|
`
|
|
781
841
|
);
|
|
782
842
|
}
|
|
843
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
844
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
845
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
846
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
847
|
+
if (debug) {
|
|
848
|
+
process.stderr.write(
|
|
849
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
850
|
+
`
|
|
851
|
+
);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
783
854
|
return {
|
|
784
855
|
provider,
|
|
785
856
|
apiKey: config.apiKey,
|
|
786
857
|
model,
|
|
787
|
-
maxTokens
|
|
788
|
-
reasoningEffort: config.reasoningEffort
|
|
858
|
+
maxTokens,
|
|
859
|
+
reasoningEffort: config.reasoningEffort,
|
|
789
860
|
debug,
|
|
790
861
|
debugPrompt,
|
|
791
862
|
debugResponse,
|
|
@@ -836,6 +907,10 @@ var PRICING_TABLE = {
|
|
|
836
907
|
inputPricePerToken: 2 / PER_MILLION,
|
|
837
908
|
outputPricePerToken: 12 / PER_MILLION
|
|
838
909
|
},
|
|
910
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
911
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
912
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
913
|
+
},
|
|
839
914
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
840
915
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
841
916
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -860,8 +935,9 @@ function usageLog(config, method, usage) {
|
|
|
860
935
|
if (!config.trackUsage) return;
|
|
861
936
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
862
937
|
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
938
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
863
939
|
process.stderr.write(
|
|
864
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
940
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
865
941
|
`
|
|
866
942
|
);
|
|
867
943
|
}
|
|
@@ -871,6 +947,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
871
947
|
const usage = {
|
|
872
948
|
inputTokens,
|
|
873
949
|
outputTokens,
|
|
950
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
874
951
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
875
952
|
durationSeconds
|
|
876
953
|
};
|
|
@@ -879,6 +956,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
879
956
|
}
|
|
880
957
|
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
881
958
|
function formatError(error) {
|
|
959
|
+
if (error instanceof VisualAITruncationError) {
|
|
960
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
961
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
962
|
+
}
|
|
882
963
|
if (error instanceof VisualAIResponseParseError) {
|
|
883
964
|
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
884
965
|
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
@@ -899,9 +980,9 @@ async function withErrorDebug(config, method, fn) {
|
|
|
899
980
|
throw error;
|
|
900
981
|
}
|
|
901
982
|
}
|
|
902
|
-
async function timedSendMessage(driver, images, prompt) {
|
|
983
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
903
984
|
const start = performance.now();
|
|
904
|
-
const response = await driver.sendMessage(images, prompt);
|
|
985
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
905
986
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
906
987
|
return { ...response, durationSeconds };
|
|
907
988
|
}
|
|
@@ -1141,6 +1222,8 @@ var StatementResultSchema = import_zod.z.object({
|
|
|
1141
1222
|
var UsageInfoSchema = import_zod.z.object({
|
|
1142
1223
|
inputTokens: import_zod.z.number(),
|
|
1143
1224
|
outputTokens: import_zod.z.number(),
|
|
1225
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1226
|
+
reasoningTokens: import_zod.z.number().optional(),
|
|
1144
1227
|
estimatedCost: import_zod.z.number().optional(),
|
|
1145
1228
|
durationSeconds: import_zod.z.number().nonnegative().optional()
|
|
1146
1229
|
});
|
|
@@ -1194,8 +1277,24 @@ function parseResponse(raw, schema) {
|
|
|
1194
1277
|
}
|
|
1195
1278
|
return result.data;
|
|
1196
1279
|
}
|
|
1280
|
+
function reconcileCheckResult(result) {
|
|
1281
|
+
if (result.statements.length === 0) {
|
|
1282
|
+
return result;
|
|
1283
|
+
}
|
|
1284
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1285
|
+
const total = result.statements.length;
|
|
1286
|
+
const computedPass = passCount === total;
|
|
1287
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1288
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1289
|
+
return {
|
|
1290
|
+
...result,
|
|
1291
|
+
pass: computedPass,
|
|
1292
|
+
reasoning
|
|
1293
|
+
};
|
|
1294
|
+
}
|
|
1197
1295
|
function parseCheckResponse(raw) {
|
|
1198
|
-
|
|
1296
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1297
|
+
return reconcileCheckResult(result);
|
|
1199
1298
|
}
|
|
1200
1299
|
function parseAskResponse(raw) {
|
|
1201
1300
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1205,6 +1304,12 @@ function parseCompareResponse(raw) {
|
|
|
1205
1304
|
}
|
|
1206
1305
|
|
|
1207
1306
|
// src/core/client.ts
|
|
1307
|
+
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
1308
|
+
function toSchemaOptions(schema) {
|
|
1309
|
+
return {
|
|
1310
|
+
responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
|
|
1311
|
+
};
|
|
1312
|
+
}
|
|
1208
1313
|
var PROVIDER_REGISTRY = {
|
|
1209
1314
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1210
1315
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1213,6 +1318,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1213
1318
|
function createDriver(provider, config) {
|
|
1214
1319
|
return PROVIDER_REGISTRY[provider](config);
|
|
1215
1320
|
}
|
|
1321
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1322
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1323
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1216
1324
|
function visualAI(config = {}) {
|
|
1217
1325
|
const resolvedConfig = resolveConfig(config);
|
|
1218
1326
|
const driverConfig = {
|
|
@@ -1231,7 +1339,7 @@ function visualAI(config = {}) {
|
|
|
1231
1339
|
const img = await normalizeImage(image);
|
|
1232
1340
|
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1233
1341
|
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1234
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1342
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1235
1343
|
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1236
1344
|
const result = parseCheckResponse(response.text);
|
|
1237
1345
|
return {
|
|
@@ -1250,7 +1358,7 @@ function visualAI(config = {}) {
|
|
|
1250
1358
|
const img = await normalizeImage(image);
|
|
1251
1359
|
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1252
1360
|
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1253
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1361
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1254
1362
|
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1255
1363
|
const result = parseCheckResponse(response.text);
|
|
1256
1364
|
return {
|
|
@@ -1264,7 +1372,7 @@ function visualAI(config = {}) {
|
|
|
1264
1372
|
const img = await normalizeImage(image);
|
|
1265
1373
|
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1266
1374
|
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1267
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1375
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1268
1376
|
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1269
1377
|
const result = parseAskResponse(response.text);
|
|
1270
1378
|
return {
|
|
@@ -1281,7 +1389,7 @@ function visualAI(config = {}) {
|
|
|
1281
1389
|
instructions: options?.instructions
|
|
1282
1390
|
});
|
|
1283
1391
|
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1284
|
-
const response = await timedSendMessage(driver, [imgA, imgB], prompt);
|
|
1392
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1285
1393
|
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1286
1394
|
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1287
1395
|
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
@@ -1316,7 +1424,7 @@ function visualAI(config = {}) {
|
|
|
1316
1424
|
const img = await normalizeImage(image);
|
|
1317
1425
|
const prompt = buildAccessibilityPrompt(options);
|
|
1318
1426
|
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1319
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1427
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1320
1428
|
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1321
1429
|
const result = parseCheckResponse(response.text);
|
|
1322
1430
|
return {
|
|
@@ -1335,7 +1443,7 @@ function visualAI(config = {}) {
|
|
|
1335
1443
|
const img = await normalizeImage(image);
|
|
1336
1444
|
const prompt = buildLayoutPrompt(options);
|
|
1337
1445
|
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1338
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1446
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1339
1447
|
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1340
1448
|
const result = parseCheckResponse(response.text);
|
|
1341
1449
|
return {
|
|
@@ -1349,7 +1457,7 @@ function visualAI(config = {}) {
|
|
|
1349
1457
|
const img = await normalizeImage(image);
|
|
1350
1458
|
const prompt = buildPageLoadPrompt(options);
|
|
1351
1459
|
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1352
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1460
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1353
1461
|
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1354
1462
|
const result = parseCheckResponse(response.text);
|
|
1355
1463
|
return {
|
|
@@ -1363,7 +1471,7 @@ function visualAI(config = {}) {
|
|
|
1363
1471
|
const img = await normalizeImage(image);
|
|
1364
1472
|
const prompt = buildContentPrompt(options);
|
|
1365
1473
|
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1366
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1474
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1367
1475
|
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1368
1476
|
const result = parseCheckResponse(response.text);
|
|
1369
1477
|
return {
|
|
@@ -1445,6 +1553,7 @@ function assertVisualCompareResult(result, label) {
|
|
|
1445
1553
|
Layout,
|
|
1446
1554
|
Model,
|
|
1447
1555
|
Provider,
|
|
1556
|
+
ReasoningEffort,
|
|
1448
1557
|
StatementResultSchema,
|
|
1449
1558
|
UsageInfoSchema,
|
|
1450
1559
|
VisualAIAssertionError,
|
|
@@ -1455,6 +1564,7 @@ function assertVisualCompareResult(result, label) {
|
|
|
1455
1564
|
VisualAIProviderError,
|
|
1456
1565
|
VisualAIRateLimitError,
|
|
1457
1566
|
VisualAIResponseParseError,
|
|
1567
|
+
VisualAITruncationError,
|
|
1458
1568
|
assertVisualCompareResult,
|
|
1459
1569
|
assertVisualResult,
|
|
1460
1570
|
formatCheckResult,
|