visual-ai-assertions 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -16
- package/dist/index.cjs +293 -127
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +48 -3
- package/dist/index.d.ts +48 -3
- package/dist/index.js +292 -127
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.cjs
CHANGED
|
@@ -54,6 +54,7 @@ __export(index_exports, {
|
|
|
54
54
|
VisualAIProviderError: () => VisualAIProviderError,
|
|
55
55
|
VisualAIRateLimitError: () => VisualAIRateLimitError,
|
|
56
56
|
VisualAIResponseParseError: () => VisualAIResponseParseError,
|
|
57
|
+
VisualAITruncationError: () => VisualAITruncationError,
|
|
57
58
|
assertVisualCompareResult: () => assertVisualCompareResult,
|
|
58
59
|
assertVisualResult: () => assertVisualResult,
|
|
59
60
|
formatCheckResult: () => formatCheckResult,
|
|
@@ -85,6 +86,7 @@ var Model = {
|
|
|
85
86
|
},
|
|
86
87
|
Google: {
|
|
87
88
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
89
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
88
90
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
89
91
|
}
|
|
90
92
|
};
|
|
@@ -94,12 +96,18 @@ var DEFAULT_MODELS = {
|
|
|
94
96
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
95
97
|
};
|
|
96
98
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
99
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
97
100
|
var MODEL_TO_PROVIDER = new Map([
|
|
98
101
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
99
102
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
100
103
|
...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
|
|
101
104
|
]);
|
|
102
105
|
var VALID_PROVIDERS = Object.values(Provider);
|
|
106
|
+
var PROVIDER_DEFAULT_REASONING = {
|
|
107
|
+
openai: "medium",
|
|
108
|
+
anthropic: "off",
|
|
109
|
+
google: "off"
|
|
110
|
+
};
|
|
103
111
|
var Content = {
|
|
104
112
|
/** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
|
|
105
113
|
PLACEHOLDER_TEXT: "placeholder-text",
|
|
@@ -173,6 +181,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
173
181
|
this.rawResponse = rawResponse;
|
|
174
182
|
}
|
|
175
183
|
};
|
|
184
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
185
|
+
partialResponse;
|
|
186
|
+
maxTokens;
|
|
187
|
+
constructor(message, partialResponse, maxTokens) {
|
|
188
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
189
|
+
this.name = "VisualAITruncationError";
|
|
190
|
+
this.partialResponse = partialResponse;
|
|
191
|
+
this.maxTokens = maxTokens;
|
|
192
|
+
}
|
|
193
|
+
};
|
|
176
194
|
var VisualAIConfigError = class extends VisualAIError {
|
|
177
195
|
constructor(message) {
|
|
178
196
|
super(message, "CONFIG_INVALID");
|
|
@@ -188,7 +206,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
188
206
|
}
|
|
189
207
|
};
|
|
190
208
|
function isVisualAIKnownError(error) {
|
|
191
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
209
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
192
210
|
}
|
|
193
211
|
|
|
194
212
|
// src/core/prompt.ts
|
|
@@ -202,12 +220,18 @@ Each issue must have:
|
|
|
202
220
|
- "description": what the issue is
|
|
203
221
|
- "suggestion": how to fix or improve it
|
|
204
222
|
`;
|
|
205
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
223
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
224
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
225
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
226
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
227
|
+
4. Include "issues" only for statements that failed
|
|
228
|
+
|
|
229
|
+
Respond with a JSON object matching this exact structure:
|
|
206
230
|
{
|
|
207
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
208
|
-
"reasoning": string, // brief overall summary
|
|
209
|
-
"issues": [...], //
|
|
210
|
-
"statements": [ // one entry per statement, in order
|
|
231
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
232
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
233
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
234
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
211
235
|
{
|
|
212
236
|
"statement": string, // the original statement text
|
|
213
237
|
"pass": boolean, // whether this statement is true
|
|
@@ -226,7 +250,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
226
250
|
Example for a failing check:
|
|
227
251
|
{
|
|
228
252
|
"pass": false,
|
|
229
|
-
"reasoning": "
|
|
253
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
230
254
|
"issues": [
|
|
231
255
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
232
256
|
],
|
|
@@ -486,7 +510,7 @@ var AnthropicDriver = class {
|
|
|
486
510
|
this.client = new Anthropic({ apiKey });
|
|
487
511
|
return this.client;
|
|
488
512
|
}
|
|
489
|
-
async sendMessage(images, prompt) {
|
|
513
|
+
async sendMessage(images, prompt, _options) {
|
|
490
514
|
const client = await this.getClient();
|
|
491
515
|
const imageBlocks = images.map((img) => ({
|
|
492
516
|
type: "image",
|
|
@@ -516,6 +540,13 @@ var AnthropicDriver = class {
|
|
|
516
540
|
const message = await client.messages.create(requestParams);
|
|
517
541
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
518
542
|
const text = textBlock?.text ?? "";
|
|
543
|
+
if (message.stop_reason === "max_tokens") {
|
|
544
|
+
throw new VisualAITruncationError(
|
|
545
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
546
|
+
text,
|
|
547
|
+
this.maxTokens
|
|
548
|
+
);
|
|
549
|
+
}
|
|
519
550
|
return {
|
|
520
551
|
text,
|
|
521
552
|
usage: {
|
|
@@ -524,6 +555,7 @@ var AnthropicDriver = class {
|
|
|
524
555
|
}
|
|
525
556
|
};
|
|
526
557
|
} catch (err) {
|
|
558
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
527
559
|
throw mapProviderError(err);
|
|
528
560
|
}
|
|
529
561
|
}
|
|
@@ -535,11 +567,11 @@ function needsCodeExecution(model) {
|
|
|
535
567
|
const match = model.match(/^gemini-(\d+)/);
|
|
536
568
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
537
569
|
}
|
|
538
|
-
var
|
|
539
|
-
low:
|
|
540
|
-
medium:
|
|
541
|
-
high:
|
|
542
|
-
xhigh:
|
|
570
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
571
|
+
low: "minimal",
|
|
572
|
+
medium: "low",
|
|
573
|
+
high: "medium",
|
|
574
|
+
xhigh: "high"
|
|
543
575
|
};
|
|
544
576
|
var GoogleDriver = class {
|
|
545
577
|
client;
|
|
@@ -579,7 +611,7 @@ var GoogleDriver = class {
|
|
|
579
611
|
this.client = new GoogleGenAI({ apiKey });
|
|
580
612
|
return this.client;
|
|
581
613
|
}
|
|
582
|
-
async sendMessage(images, prompt) {
|
|
614
|
+
async sendMessage(images, prompt, _options) {
|
|
583
615
|
const client = await this.getClient();
|
|
584
616
|
try {
|
|
585
617
|
const response = await client.models.generateContent({
|
|
@@ -590,20 +622,36 @@ var GoogleDriver = class {
|
|
|
590
622
|
maxOutputTokens: this.maxTokens,
|
|
591
623
|
...this.reasoningEffort && {
|
|
592
624
|
thinkingConfig: {
|
|
593
|
-
|
|
625
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
594
626
|
}
|
|
595
627
|
}
|
|
596
628
|
}
|
|
597
629
|
});
|
|
630
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
631
|
+
if (finishReason === "MAX_TOKENS") {
|
|
632
|
+
throw new VisualAITruncationError(
|
|
633
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
634
|
+
response.text ?? "",
|
|
635
|
+
this.maxTokens
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
if (finishReason && finishReason !== "STOP") {
|
|
639
|
+
throw new VisualAIProviderError(
|
|
640
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
641
|
+
);
|
|
642
|
+
}
|
|
598
643
|
const text = response.text ?? "";
|
|
644
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
599
645
|
return {
|
|
600
646
|
text,
|
|
601
647
|
usage: response.usageMetadata ? {
|
|
602
648
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
603
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
649
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
650
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
604
651
|
} : void 0
|
|
605
652
|
};
|
|
606
653
|
} catch (err) {
|
|
654
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
607
655
|
throw mapProviderError(err);
|
|
608
656
|
}
|
|
609
657
|
}
|
|
@@ -675,17 +723,25 @@ var OpenAIDriver = class {
|
|
|
675
723
|
this.client = new OpenAI({ apiKey });
|
|
676
724
|
return this.client;
|
|
677
725
|
}
|
|
678
|
-
async sendMessage(images, prompt) {
|
|
726
|
+
async sendMessage(images, prompt, options) {
|
|
679
727
|
const client = await this.getClient();
|
|
680
728
|
const imageBlocks = images.map((img) => ({
|
|
681
729
|
type: "input_image",
|
|
682
730
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
683
731
|
}));
|
|
684
732
|
try {
|
|
733
|
+
const format = options?.responseSchema ? {
|
|
734
|
+
type: "json_schema",
|
|
735
|
+
json_schema: {
|
|
736
|
+
name: "visual_ai_response",
|
|
737
|
+
strict: true,
|
|
738
|
+
schema: options.responseSchema
|
|
739
|
+
}
|
|
740
|
+
} : { type: "json_object" };
|
|
685
741
|
const requestParams = {
|
|
686
742
|
model: this.model,
|
|
687
743
|
max_output_tokens: this.maxTokens,
|
|
688
|
-
text: { format
|
|
744
|
+
text: { format },
|
|
689
745
|
input: [
|
|
690
746
|
{
|
|
691
747
|
role: "user",
|
|
@@ -697,15 +753,26 @@ var OpenAIDriver = class {
|
|
|
697
753
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
698
754
|
}
|
|
699
755
|
const response = await client.responses.create(requestParams);
|
|
756
|
+
if (response.status && response.status !== "completed") {
|
|
757
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
758
|
+
throw new VisualAITruncationError(
|
|
759
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
760
|
+
response.output_text ?? "",
|
|
761
|
+
this.maxTokens
|
|
762
|
+
);
|
|
763
|
+
}
|
|
700
764
|
const text = response.output_text ?? "";
|
|
765
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
701
766
|
return {
|
|
702
767
|
text,
|
|
703
768
|
usage: response.usage ? {
|
|
704
769
|
inputTokens: response.usage.input_tokens,
|
|
705
|
-
outputTokens: response.usage.output_tokens
|
|
770
|
+
outputTokens: response.usage.output_tokens,
|
|
771
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
706
772
|
} : void 0
|
|
707
773
|
};
|
|
708
774
|
} catch (err) {
|
|
775
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
709
776
|
throw mapProviderError(err);
|
|
710
777
|
}
|
|
711
778
|
}
|
|
@@ -752,16 +819,40 @@ function parseBooleanEnv(envName, value) {
|
|
|
752
819
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
753
820
|
);
|
|
754
821
|
}
|
|
822
|
+
var debugDeprecationWarned = false;
|
|
755
823
|
function resolveConfig(config) {
|
|
756
824
|
const provider = resolveProvider(config);
|
|
757
825
|
const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
|
|
826
|
+
const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
|
|
827
|
+
const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
|
|
828
|
+
const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
|
|
829
|
+
if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
|
|
830
|
+
debugDeprecationWarned = true;
|
|
831
|
+
process.stderr.write(
|
|
832
|
+
`[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
|
|
833
|
+
`
|
|
834
|
+
);
|
|
835
|
+
}
|
|
836
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
837
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
838
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
839
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
840
|
+
if (debug) {
|
|
841
|
+
process.stderr.write(
|
|
842
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
843
|
+
`
|
|
844
|
+
);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
758
847
|
return {
|
|
759
848
|
provider,
|
|
760
849
|
apiKey: config.apiKey,
|
|
761
850
|
model,
|
|
762
|
-
maxTokens
|
|
851
|
+
maxTokens,
|
|
763
852
|
reasoningEffort: config.reasoningEffort,
|
|
764
|
-
debug
|
|
853
|
+
debug,
|
|
854
|
+
debugPrompt,
|
|
855
|
+
debugResponse,
|
|
765
856
|
trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
|
|
766
857
|
};
|
|
767
858
|
}
|
|
@@ -809,6 +900,10 @@ var PRICING_TABLE = {
|
|
|
809
900
|
inputPricePerToken: 2 / PER_MILLION,
|
|
810
901
|
outputPricePerToken: 12 / PER_MILLION
|
|
811
902
|
},
|
|
903
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
904
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
905
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
906
|
+
},
|
|
812
907
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
813
908
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
814
909
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -822,8 +917,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
|
|
|
822
917
|
}
|
|
823
918
|
|
|
824
919
|
// src/core/debug.ts
|
|
825
|
-
function debugLog(config, label, data) {
|
|
826
|
-
|
|
920
|
+
function debugLog(config, label, data, kind = "error") {
|
|
921
|
+
const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
|
|
922
|
+
if (enabled) {
|
|
827
923
|
process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
|
|
828
924
|
`);
|
|
829
925
|
}
|
|
@@ -831,8 +927,10 @@ function debugLog(config, label, data) {
|
|
|
831
927
|
function usageLog(config, method, usage) {
|
|
832
928
|
if (!config.trackUsage) return;
|
|
833
929
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
930
|
+
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
931
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
834
932
|
process.stderr.write(
|
|
835
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
|
|
933
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
836
934
|
`
|
|
837
935
|
);
|
|
838
936
|
}
|
|
@@ -842,15 +940,42 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
842
940
|
const usage = {
|
|
843
941
|
inputTokens,
|
|
844
942
|
outputTokens,
|
|
943
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
845
944
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
846
945
|
durationSeconds
|
|
847
946
|
};
|
|
848
947
|
usageLog(config, method, usage);
|
|
849
948
|
return usage;
|
|
850
949
|
}
|
|
851
|
-
|
|
950
|
+
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
951
|
+
function formatError(error) {
|
|
952
|
+
if (error instanceof VisualAITruncationError) {
|
|
953
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
954
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
955
|
+
}
|
|
956
|
+
if (error instanceof VisualAIResponseParseError) {
|
|
957
|
+
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
958
|
+
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
959
|
+
}
|
|
960
|
+
if (error instanceof VisualAIError) {
|
|
961
|
+
return `${error.name} (${error.code}): ${error.message}`;
|
|
962
|
+
}
|
|
963
|
+
if (error instanceof Error) {
|
|
964
|
+
return `${error.name}: ${error.message}`;
|
|
965
|
+
}
|
|
966
|
+
return String(error);
|
|
967
|
+
}
|
|
968
|
+
async function withErrorDebug(config, method, fn) {
|
|
969
|
+
try {
|
|
970
|
+
return await fn();
|
|
971
|
+
} catch (error) {
|
|
972
|
+
debugLog(config, `${method} error`, formatError(error), "error");
|
|
973
|
+
throw error;
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
852
977
|
const start = performance.now();
|
|
853
|
-
const response = await driver.sendMessage(images, prompt);
|
|
978
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
854
979
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
855
980
|
return { ...response, durationSeconds };
|
|
856
981
|
}
|
|
@@ -1090,6 +1215,8 @@ var StatementResultSchema = import_zod.z.object({
|
|
|
1090
1215
|
var UsageInfoSchema = import_zod.z.object({
|
|
1091
1216
|
inputTokens: import_zod.z.number(),
|
|
1092
1217
|
outputTokens: import_zod.z.number(),
|
|
1218
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1219
|
+
reasoningTokens: import_zod.z.number().optional(),
|
|
1093
1220
|
estimatedCost: import_zod.z.number().optional(),
|
|
1094
1221
|
durationSeconds: import_zod.z.number().nonnegative().optional()
|
|
1095
1222
|
});
|
|
@@ -1143,8 +1270,24 @@ function parseResponse(raw, schema) {
|
|
|
1143
1270
|
}
|
|
1144
1271
|
return result.data;
|
|
1145
1272
|
}
|
|
1273
|
+
function reconcileCheckResult(result) {
|
|
1274
|
+
if (result.statements.length === 0) {
|
|
1275
|
+
return result;
|
|
1276
|
+
}
|
|
1277
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1278
|
+
const total = result.statements.length;
|
|
1279
|
+
const computedPass = passCount === total;
|
|
1280
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1281
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1282
|
+
return {
|
|
1283
|
+
...result,
|
|
1284
|
+
pass: computedPass,
|
|
1285
|
+
reasoning
|
|
1286
|
+
};
|
|
1287
|
+
}
|
|
1146
1288
|
function parseCheckResponse(raw) {
|
|
1147
|
-
|
|
1289
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1290
|
+
return reconcileCheckResult(result);
|
|
1148
1291
|
}
|
|
1149
1292
|
function parseAskResponse(raw) {
|
|
1150
1293
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1154,6 +1297,12 @@ function parseCompareResponse(raw) {
|
|
|
1154
1297
|
}
|
|
1155
1298
|
|
|
1156
1299
|
// src/core/client.ts
|
|
1300
|
+
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
1301
|
+
function toSchemaOptions(schema) {
|
|
1302
|
+
return {
|
|
1303
|
+
responseSchema: (0, import_zod_to_json_schema.zodToJsonSchema)(schema, { target: "openAi" })
|
|
1304
|
+
};
|
|
1305
|
+
}
|
|
1157
1306
|
var PROVIDER_REGISTRY = {
|
|
1158
1307
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1159
1308
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1162,6 +1311,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1162
1311
|
function createDriver(provider, config) {
|
|
1163
1312
|
return PROVIDER_REGISTRY[provider](config);
|
|
1164
1313
|
}
|
|
1314
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1315
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1316
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1165
1317
|
function visualAI(config = {}) {
|
|
1166
1318
|
const resolvedConfig = resolveConfig(config);
|
|
1167
1319
|
const driverConfig = {
|
|
@@ -1176,16 +1328,18 @@ function visualAI(config = {}) {
|
|
|
1176
1328
|
if (elements.length === 0) {
|
|
1177
1329
|
throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
|
|
1178
1330
|
}
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1331
|
+
return withErrorDebug(resolvedConfig, methodName, async () => {
|
|
1332
|
+
const img = await normalizeImage(image);
|
|
1333
|
+
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1334
|
+
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1335
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1336
|
+
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1337
|
+
const result = parseCheckResponse(response.text);
|
|
1338
|
+
return {
|
|
1339
|
+
...result,
|
|
1340
|
+
usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
|
|
1341
|
+
};
|
|
1342
|
+
});
|
|
1189
1343
|
}
|
|
1190
1344
|
return {
|
|
1191
1345
|
async check(image, statements, options) {
|
|
@@ -1193,61 +1347,64 @@ function visualAI(config = {}) {
|
|
|
1193
1347
|
if (stmts.length === 0) {
|
|
1194
1348
|
throw new VisualAIConfigError("At least one statement is required for check()");
|
|
1195
1349
|
}
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1350
|
+
return withErrorDebug(resolvedConfig, "check", async () => {
|
|
1351
|
+
const img = await normalizeImage(image);
|
|
1352
|
+
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1353
|
+
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1354
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1355
|
+
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1356
|
+
const result = parseCheckResponse(response.text);
|
|
1357
|
+
return {
|
|
1358
|
+
...result,
|
|
1359
|
+
usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
|
|
1360
|
+
};
|
|
1361
|
+
});
|
|
1206
1362
|
},
|
|
1207
1363
|
async ask(image, userPrompt, options) {
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1364
|
+
return withErrorDebug(resolvedConfig, "ask", async () => {
|
|
1365
|
+
const img = await normalizeImage(image);
|
|
1366
|
+
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1367
|
+
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1368
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1369
|
+
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1370
|
+
const result = parseAskResponse(response.text);
|
|
1371
|
+
return {
|
|
1372
|
+
...result,
|
|
1373
|
+
usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
|
|
1374
|
+
};
|
|
1375
|
+
});
|
|
1218
1376
|
},
|
|
1219
1377
|
async compare(imageA, imageB, options) {
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
if (!resolvedConfig.debug) {
|
|
1378
|
+
return withErrorDebug(resolvedConfig, "compare", async () => {
|
|
1379
|
+
const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
|
|
1380
|
+
const prompt = buildComparePrompt({
|
|
1381
|
+
userPrompt: options?.prompt,
|
|
1382
|
+
instructions: options?.instructions
|
|
1383
|
+
});
|
|
1384
|
+
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1385
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1386
|
+
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1387
|
+
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1388
|
+
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
1389
|
+
let diffImage;
|
|
1390
|
+
if (effectiveDiffImage) {
|
|
1391
|
+
try {
|
|
1392
|
+
diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
|
|
1393
|
+
} catch (err) {
|
|
1394
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1238
1395
|
process.stderr.write(
|
|
1239
1396
|
`[visual-ai-assertions] warning: diff generation failed: ${msg}
|
|
1240
1397
|
`
|
|
1241
1398
|
);
|
|
1242
1399
|
}
|
|
1243
1400
|
}
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
};
|
|
1401
|
+
const result = parseCompareResponse(response.text);
|
|
1402
|
+
return {
|
|
1403
|
+
...result,
|
|
1404
|
+
...diffImage ? { diffImage } : {},
|
|
1405
|
+
usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
|
|
1406
|
+
};
|
|
1407
|
+
});
|
|
1251
1408
|
},
|
|
1252
1409
|
elementsVisible(image, elements, options) {
|
|
1253
1410
|
return checkElementsVisibility(image, elements, true, options);
|
|
@@ -1256,57 +1413,65 @@ function visualAI(config = {}) {
|
|
|
1256
1413
|
return checkElementsVisibility(image, elements, false, options);
|
|
1257
1414
|
},
|
|
1258
1415
|
async accessibility(image, options) {
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1416
|
+
return withErrorDebug(resolvedConfig, "accessibility", async () => {
|
|
1417
|
+
const img = await normalizeImage(image);
|
|
1418
|
+
const prompt = buildAccessibilityPrompt(options);
|
|
1419
|
+
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1420
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1421
|
+
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1422
|
+
const result = parseCheckResponse(response.text);
|
|
1423
|
+
return {
|
|
1424
|
+
...result,
|
|
1425
|
+
usage: processUsage(
|
|
1426
|
+
"accessibility",
|
|
1427
|
+
response.usage,
|
|
1428
|
+
response.durationSeconds,
|
|
1429
|
+
resolvedConfig
|
|
1430
|
+
)
|
|
1431
|
+
};
|
|
1432
|
+
});
|
|
1274
1433
|
},
|
|
1275
1434
|
async layout(image, options) {
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1435
|
+
return withErrorDebug(resolvedConfig, "layout", async () => {
|
|
1436
|
+
const img = await normalizeImage(image);
|
|
1437
|
+
const prompt = buildLayoutPrompt(options);
|
|
1438
|
+
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1439
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1440
|
+
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1441
|
+
const result = parseCheckResponse(response.text);
|
|
1442
|
+
return {
|
|
1443
|
+
...result,
|
|
1444
|
+
usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
|
|
1445
|
+
};
|
|
1446
|
+
});
|
|
1286
1447
|
},
|
|
1287
1448
|
async pageLoad(image, options) {
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1449
|
+
return withErrorDebug(resolvedConfig, "pageLoad", async () => {
|
|
1450
|
+
const img = await normalizeImage(image);
|
|
1451
|
+
const prompt = buildPageLoadPrompt(options);
|
|
1452
|
+
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1453
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1454
|
+
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1455
|
+
const result = parseCheckResponse(response.text);
|
|
1456
|
+
return {
|
|
1457
|
+
...result,
|
|
1458
|
+
usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
|
|
1459
|
+
};
|
|
1460
|
+
});
|
|
1298
1461
|
},
|
|
1299
1462
|
async content(image, options) {
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1463
|
+
return withErrorDebug(resolvedConfig, "content", async () => {
|
|
1464
|
+
const img = await normalizeImage(image);
|
|
1465
|
+
const prompt = buildContentPrompt(options);
|
|
1466
|
+
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1467
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1468
|
+
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1469
|
+
const result = parseCheckResponse(response.text);
|
|
1470
|
+
return {
|
|
1471
|
+
...result,
|
|
1472
|
+
usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
|
|
1473
|
+
};
|
|
1474
|
+
});
|
|
1310
1475
|
}
|
|
1311
1476
|
};
|
|
1312
1477
|
}
|
|
@@ -1391,6 +1556,7 @@ function assertVisualCompareResult(result, label) {
|
|
|
1391
1556
|
VisualAIProviderError,
|
|
1392
1557
|
VisualAIRateLimitError,
|
|
1393
1558
|
VisualAIResponseParseError,
|
|
1559
|
+
VisualAITruncationError,
|
|
1394
1560
|
assertVisualCompareResult,
|
|
1395
1561
|
assertVisualResult,
|
|
1396
1562
|
formatCheckResult,
|