visual-ai-assertions 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -16
- package/dist/index.cjs +293 -127
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +48 -3
- package/dist/index.d.ts +48 -3
- package/dist/index.js +292 -127
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -20,6 +20,7 @@ var Model = {
|
|
|
20
20
|
},
|
|
21
21
|
Google: {
|
|
22
22
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
23
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
23
24
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
24
25
|
}
|
|
25
26
|
};
|
|
@@ -29,12 +30,18 @@ var DEFAULT_MODELS = {
|
|
|
29
30
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
30
31
|
};
|
|
31
32
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
33
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
32
34
|
var MODEL_TO_PROVIDER = new Map([
|
|
33
35
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
34
36
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
35
37
|
...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
|
|
36
38
|
]);
|
|
37
39
|
var VALID_PROVIDERS = Object.values(Provider);
|
|
40
|
+
var PROVIDER_DEFAULT_REASONING = {
|
|
41
|
+
openai: "medium",
|
|
42
|
+
anthropic: "off",
|
|
43
|
+
google: "off"
|
|
44
|
+
};
|
|
38
45
|
var Content = {
|
|
39
46
|
/** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
|
|
40
47
|
PLACEHOLDER_TEXT: "placeholder-text",
|
|
@@ -108,6 +115,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
108
115
|
this.rawResponse = rawResponse;
|
|
109
116
|
}
|
|
110
117
|
};
|
|
118
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
119
|
+
partialResponse;
|
|
120
|
+
maxTokens;
|
|
121
|
+
constructor(message, partialResponse, maxTokens) {
|
|
122
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
123
|
+
this.name = "VisualAITruncationError";
|
|
124
|
+
this.partialResponse = partialResponse;
|
|
125
|
+
this.maxTokens = maxTokens;
|
|
126
|
+
}
|
|
127
|
+
};
|
|
111
128
|
var VisualAIConfigError = class extends VisualAIError {
|
|
112
129
|
constructor(message) {
|
|
113
130
|
super(message, "CONFIG_INVALID");
|
|
@@ -123,7 +140,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
123
140
|
}
|
|
124
141
|
};
|
|
125
142
|
function isVisualAIKnownError(error) {
|
|
126
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
143
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
127
144
|
}
|
|
128
145
|
|
|
129
146
|
// src/core/prompt.ts
|
|
@@ -137,12 +154,18 @@ Each issue must have:
|
|
|
137
154
|
- "description": what the issue is
|
|
138
155
|
- "suggestion": how to fix or improve it
|
|
139
156
|
`;
|
|
140
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
157
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
158
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
159
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
160
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
161
|
+
4. Include "issues" only for statements that failed
|
|
162
|
+
|
|
163
|
+
Respond with a JSON object matching this exact structure:
|
|
141
164
|
{
|
|
142
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
143
|
-
"reasoning": string, // brief overall summary
|
|
144
|
-
"issues": [...], //
|
|
145
|
-
"statements": [ // one entry per statement, in order
|
|
165
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
166
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
167
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
168
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
146
169
|
{
|
|
147
170
|
"statement": string, // the original statement text
|
|
148
171
|
"pass": boolean, // whether this statement is true
|
|
@@ -161,7 +184,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
161
184
|
Example for a failing check:
|
|
162
185
|
{
|
|
163
186
|
"pass": false,
|
|
164
|
-
"reasoning": "
|
|
187
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
165
188
|
"issues": [
|
|
166
189
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
167
190
|
],
|
|
@@ -421,7 +444,7 @@ var AnthropicDriver = class {
|
|
|
421
444
|
this.client = new Anthropic({ apiKey });
|
|
422
445
|
return this.client;
|
|
423
446
|
}
|
|
424
|
-
async sendMessage(images, prompt) {
|
|
447
|
+
async sendMessage(images, prompt, _options) {
|
|
425
448
|
const client = await this.getClient();
|
|
426
449
|
const imageBlocks = images.map((img) => ({
|
|
427
450
|
type: "image",
|
|
@@ -451,6 +474,13 @@ var AnthropicDriver = class {
|
|
|
451
474
|
const message = await client.messages.create(requestParams);
|
|
452
475
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
453
476
|
const text = textBlock?.text ?? "";
|
|
477
|
+
if (message.stop_reason === "max_tokens") {
|
|
478
|
+
throw new VisualAITruncationError(
|
|
479
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
480
|
+
text,
|
|
481
|
+
this.maxTokens
|
|
482
|
+
);
|
|
483
|
+
}
|
|
454
484
|
return {
|
|
455
485
|
text,
|
|
456
486
|
usage: {
|
|
@@ -459,6 +489,7 @@ var AnthropicDriver = class {
|
|
|
459
489
|
}
|
|
460
490
|
};
|
|
461
491
|
} catch (err) {
|
|
492
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
462
493
|
throw mapProviderError(err);
|
|
463
494
|
}
|
|
464
495
|
}
|
|
@@ -470,11 +501,11 @@ function needsCodeExecution(model) {
|
|
|
470
501
|
const match = model.match(/^gemini-(\d+)/);
|
|
471
502
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
472
503
|
}
|
|
473
|
-
var
|
|
474
|
-
low:
|
|
475
|
-
medium:
|
|
476
|
-
high:
|
|
477
|
-
xhigh:
|
|
504
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
505
|
+
low: "minimal",
|
|
506
|
+
medium: "low",
|
|
507
|
+
high: "medium",
|
|
508
|
+
xhigh: "high"
|
|
478
509
|
};
|
|
479
510
|
var GoogleDriver = class {
|
|
480
511
|
client;
|
|
@@ -514,7 +545,7 @@ var GoogleDriver = class {
|
|
|
514
545
|
this.client = new GoogleGenAI({ apiKey });
|
|
515
546
|
return this.client;
|
|
516
547
|
}
|
|
517
|
-
async sendMessage(images, prompt) {
|
|
548
|
+
async sendMessage(images, prompt, _options) {
|
|
518
549
|
const client = await this.getClient();
|
|
519
550
|
try {
|
|
520
551
|
const response = await client.models.generateContent({
|
|
@@ -525,20 +556,36 @@ var GoogleDriver = class {
|
|
|
525
556
|
maxOutputTokens: this.maxTokens,
|
|
526
557
|
...this.reasoningEffort && {
|
|
527
558
|
thinkingConfig: {
|
|
528
|
-
|
|
559
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
529
560
|
}
|
|
530
561
|
}
|
|
531
562
|
}
|
|
532
563
|
});
|
|
564
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
565
|
+
if (finishReason === "MAX_TOKENS") {
|
|
566
|
+
throw new VisualAITruncationError(
|
|
567
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
568
|
+
response.text ?? "",
|
|
569
|
+
this.maxTokens
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
if (finishReason && finishReason !== "STOP") {
|
|
573
|
+
throw new VisualAIProviderError(
|
|
574
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
575
|
+
);
|
|
576
|
+
}
|
|
533
577
|
const text = response.text ?? "";
|
|
578
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
534
579
|
return {
|
|
535
580
|
text,
|
|
536
581
|
usage: response.usageMetadata ? {
|
|
537
582
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
538
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
583
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
584
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
539
585
|
} : void 0
|
|
540
586
|
};
|
|
541
587
|
} catch (err) {
|
|
588
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
542
589
|
throw mapProviderError(err);
|
|
543
590
|
}
|
|
544
591
|
}
|
|
@@ -610,17 +657,25 @@ var OpenAIDriver = class {
|
|
|
610
657
|
this.client = new OpenAI({ apiKey });
|
|
611
658
|
return this.client;
|
|
612
659
|
}
|
|
613
|
-
async sendMessage(images, prompt) {
|
|
660
|
+
async sendMessage(images, prompt, options) {
|
|
614
661
|
const client = await this.getClient();
|
|
615
662
|
const imageBlocks = images.map((img) => ({
|
|
616
663
|
type: "input_image",
|
|
617
664
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
618
665
|
}));
|
|
619
666
|
try {
|
|
667
|
+
const format = options?.responseSchema ? {
|
|
668
|
+
type: "json_schema",
|
|
669
|
+
json_schema: {
|
|
670
|
+
name: "visual_ai_response",
|
|
671
|
+
strict: true,
|
|
672
|
+
schema: options.responseSchema
|
|
673
|
+
}
|
|
674
|
+
} : { type: "json_object" };
|
|
620
675
|
const requestParams = {
|
|
621
676
|
model: this.model,
|
|
622
677
|
max_output_tokens: this.maxTokens,
|
|
623
|
-
text: { format
|
|
678
|
+
text: { format },
|
|
624
679
|
input: [
|
|
625
680
|
{
|
|
626
681
|
role: "user",
|
|
@@ -632,15 +687,26 @@ var OpenAIDriver = class {
|
|
|
632
687
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
633
688
|
}
|
|
634
689
|
const response = await client.responses.create(requestParams);
|
|
690
|
+
if (response.status && response.status !== "completed") {
|
|
691
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
692
|
+
throw new VisualAITruncationError(
|
|
693
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
694
|
+
response.output_text ?? "",
|
|
695
|
+
this.maxTokens
|
|
696
|
+
);
|
|
697
|
+
}
|
|
635
698
|
const text = response.output_text ?? "";
|
|
699
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
636
700
|
return {
|
|
637
701
|
text,
|
|
638
702
|
usage: response.usage ? {
|
|
639
703
|
inputTokens: response.usage.input_tokens,
|
|
640
|
-
outputTokens: response.usage.output_tokens
|
|
704
|
+
outputTokens: response.usage.output_tokens,
|
|
705
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
641
706
|
} : void 0
|
|
642
707
|
};
|
|
643
708
|
} catch (err) {
|
|
709
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
644
710
|
throw mapProviderError(err);
|
|
645
711
|
}
|
|
646
712
|
}
|
|
@@ -687,16 +753,40 @@ function parseBooleanEnv(envName, value) {
|
|
|
687
753
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
688
754
|
);
|
|
689
755
|
}
|
|
756
|
+
var debugDeprecationWarned = false;
|
|
690
757
|
function resolveConfig(config) {
|
|
691
758
|
const provider = resolveProvider(config);
|
|
692
759
|
const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
|
|
760
|
+
const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
|
|
761
|
+
const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
|
|
762
|
+
const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
|
|
763
|
+
if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
|
|
764
|
+
debugDeprecationWarned = true;
|
|
765
|
+
process.stderr.write(
|
|
766
|
+
`[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
|
|
767
|
+
`
|
|
768
|
+
);
|
|
769
|
+
}
|
|
770
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
771
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
772
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
773
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
774
|
+
if (debug) {
|
|
775
|
+
process.stderr.write(
|
|
776
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
777
|
+
`
|
|
778
|
+
);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
693
781
|
return {
|
|
694
782
|
provider,
|
|
695
783
|
apiKey: config.apiKey,
|
|
696
784
|
model,
|
|
697
|
-
maxTokens
|
|
785
|
+
maxTokens,
|
|
698
786
|
reasoningEffort: config.reasoningEffort,
|
|
699
|
-
debug
|
|
787
|
+
debug,
|
|
788
|
+
debugPrompt,
|
|
789
|
+
debugResponse,
|
|
700
790
|
trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
|
|
701
791
|
};
|
|
702
792
|
}
|
|
@@ -744,6 +834,10 @@ var PRICING_TABLE = {
|
|
|
744
834
|
inputPricePerToken: 2 / PER_MILLION,
|
|
745
835
|
outputPricePerToken: 12 / PER_MILLION
|
|
746
836
|
},
|
|
837
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
838
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
839
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
840
|
+
},
|
|
747
841
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
748
842
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
749
843
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -757,8 +851,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
|
|
|
757
851
|
}
|
|
758
852
|
|
|
759
853
|
// src/core/debug.ts
|
|
760
|
-
function debugLog(config, label, data) {
|
|
761
|
-
|
|
854
|
+
function debugLog(config, label, data, kind = "error") {
|
|
855
|
+
const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
|
|
856
|
+
if (enabled) {
|
|
762
857
|
process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
|
|
763
858
|
`);
|
|
764
859
|
}
|
|
@@ -766,8 +861,10 @@ function debugLog(config, label, data) {
|
|
|
766
861
|
function usageLog(config, method, usage) {
|
|
767
862
|
if (!config.trackUsage) return;
|
|
768
863
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
864
|
+
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
865
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
769
866
|
process.stderr.write(
|
|
770
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
|
|
867
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
771
868
|
`
|
|
772
869
|
);
|
|
773
870
|
}
|
|
@@ -777,15 +874,42 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
777
874
|
const usage = {
|
|
778
875
|
inputTokens,
|
|
779
876
|
outputTokens,
|
|
877
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
780
878
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
781
879
|
durationSeconds
|
|
782
880
|
};
|
|
783
881
|
usageLog(config, method, usage);
|
|
784
882
|
return usage;
|
|
785
883
|
}
|
|
786
|
-
|
|
884
|
+
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
885
|
+
function formatError(error) {
|
|
886
|
+
if (error instanceof VisualAITruncationError) {
|
|
887
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
888
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
889
|
+
}
|
|
890
|
+
if (error instanceof VisualAIResponseParseError) {
|
|
891
|
+
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
892
|
+
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
893
|
+
}
|
|
894
|
+
if (error instanceof VisualAIError) {
|
|
895
|
+
return `${error.name} (${error.code}): ${error.message}`;
|
|
896
|
+
}
|
|
897
|
+
if (error instanceof Error) {
|
|
898
|
+
return `${error.name}: ${error.message}`;
|
|
899
|
+
}
|
|
900
|
+
return String(error);
|
|
901
|
+
}
|
|
902
|
+
async function withErrorDebug(config, method, fn) {
|
|
903
|
+
try {
|
|
904
|
+
return await fn();
|
|
905
|
+
} catch (error) {
|
|
906
|
+
debugLog(config, `${method} error`, formatError(error), "error");
|
|
907
|
+
throw error;
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
787
911
|
const start = performance.now();
|
|
788
|
-
const response = await driver.sendMessage(images, prompt);
|
|
912
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
789
913
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
790
914
|
return { ...response, durationSeconds };
|
|
791
915
|
}
|
|
@@ -1025,6 +1149,8 @@ var StatementResultSchema = z.object({
|
|
|
1025
1149
|
var UsageInfoSchema = z.object({
|
|
1026
1150
|
inputTokens: z.number(),
|
|
1027
1151
|
outputTokens: z.number(),
|
|
1152
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1153
|
+
reasoningTokens: z.number().optional(),
|
|
1028
1154
|
estimatedCost: z.number().optional(),
|
|
1029
1155
|
durationSeconds: z.number().nonnegative().optional()
|
|
1030
1156
|
});
|
|
@@ -1078,8 +1204,24 @@ function parseResponse(raw, schema) {
|
|
|
1078
1204
|
}
|
|
1079
1205
|
return result.data;
|
|
1080
1206
|
}
|
|
1207
|
+
function reconcileCheckResult(result) {
|
|
1208
|
+
if (result.statements.length === 0) {
|
|
1209
|
+
return result;
|
|
1210
|
+
}
|
|
1211
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1212
|
+
const total = result.statements.length;
|
|
1213
|
+
const computedPass = passCount === total;
|
|
1214
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1215
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1216
|
+
return {
|
|
1217
|
+
...result,
|
|
1218
|
+
pass: computedPass,
|
|
1219
|
+
reasoning
|
|
1220
|
+
};
|
|
1221
|
+
}
|
|
1081
1222
|
function parseCheckResponse(raw) {
|
|
1082
|
-
|
|
1223
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1224
|
+
return reconcileCheckResult(result);
|
|
1083
1225
|
}
|
|
1084
1226
|
function parseAskResponse(raw) {
|
|
1085
1227
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1089,6 +1231,12 @@ function parseCompareResponse(raw) {
|
|
|
1089
1231
|
}
|
|
1090
1232
|
|
|
1091
1233
|
// src/core/client.ts
|
|
1234
|
+
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
1235
|
+
function toSchemaOptions(schema) {
|
|
1236
|
+
return {
|
|
1237
|
+
responseSchema: zodToJsonSchema(schema, { target: "openAi" })
|
|
1238
|
+
};
|
|
1239
|
+
}
|
|
1092
1240
|
var PROVIDER_REGISTRY = {
|
|
1093
1241
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1094
1242
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1097,6 +1245,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1097
1245
|
function createDriver(provider, config) {
|
|
1098
1246
|
return PROVIDER_REGISTRY[provider](config);
|
|
1099
1247
|
}
|
|
1248
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1249
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1250
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1100
1251
|
function visualAI(config = {}) {
|
|
1101
1252
|
const resolvedConfig = resolveConfig(config);
|
|
1102
1253
|
const driverConfig = {
|
|
@@ -1111,16 +1262,18 @@ function visualAI(config = {}) {
|
|
|
1111
1262
|
if (elements.length === 0) {
|
|
1112
1263
|
throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
|
|
1113
1264
|
}
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1265
|
+
return withErrorDebug(resolvedConfig, methodName, async () => {
|
|
1266
|
+
const img = await normalizeImage(image);
|
|
1267
|
+
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1268
|
+
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1269
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1270
|
+
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1271
|
+
const result = parseCheckResponse(response.text);
|
|
1272
|
+
return {
|
|
1273
|
+
...result,
|
|
1274
|
+
usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
|
|
1275
|
+
};
|
|
1276
|
+
});
|
|
1124
1277
|
}
|
|
1125
1278
|
return {
|
|
1126
1279
|
async check(image, statements, options) {
|
|
@@ -1128,61 +1281,64 @@ function visualAI(config = {}) {
|
|
|
1128
1281
|
if (stmts.length === 0) {
|
|
1129
1282
|
throw new VisualAIConfigError("At least one statement is required for check()");
|
|
1130
1283
|
}
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1284
|
+
return withErrorDebug(resolvedConfig, "check", async () => {
|
|
1285
|
+
const img = await normalizeImage(image);
|
|
1286
|
+
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1287
|
+
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1288
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1289
|
+
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1290
|
+
const result = parseCheckResponse(response.text);
|
|
1291
|
+
return {
|
|
1292
|
+
...result,
|
|
1293
|
+
usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
|
|
1294
|
+
};
|
|
1295
|
+
});
|
|
1141
1296
|
},
|
|
1142
1297
|
async ask(image, userPrompt, options) {
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1298
|
+
return withErrorDebug(resolvedConfig, "ask", async () => {
|
|
1299
|
+
const img = await normalizeImage(image);
|
|
1300
|
+
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1301
|
+
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1302
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1303
|
+
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1304
|
+
const result = parseAskResponse(response.text);
|
|
1305
|
+
return {
|
|
1306
|
+
...result,
|
|
1307
|
+
usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
|
|
1308
|
+
};
|
|
1309
|
+
});
|
|
1153
1310
|
},
|
|
1154
1311
|
async compare(imageA, imageB, options) {
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
if (!resolvedConfig.debug) {
|
|
1312
|
+
return withErrorDebug(resolvedConfig, "compare", async () => {
|
|
1313
|
+
const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
|
|
1314
|
+
const prompt = buildComparePrompt({
|
|
1315
|
+
userPrompt: options?.prompt,
|
|
1316
|
+
instructions: options?.instructions
|
|
1317
|
+
});
|
|
1318
|
+
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1319
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1320
|
+
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1321
|
+
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1322
|
+
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
1323
|
+
let diffImage;
|
|
1324
|
+
if (effectiveDiffImage) {
|
|
1325
|
+
try {
|
|
1326
|
+
diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
|
|
1327
|
+
} catch (err) {
|
|
1328
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1173
1329
|
process.stderr.write(
|
|
1174
1330
|
`[visual-ai-assertions] warning: diff generation failed: ${msg}
|
|
1175
1331
|
`
|
|
1176
1332
|
);
|
|
1177
1333
|
}
|
|
1178
1334
|
}
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
};
|
|
1335
|
+
const result = parseCompareResponse(response.text);
|
|
1336
|
+
return {
|
|
1337
|
+
...result,
|
|
1338
|
+
...diffImage ? { diffImage } : {},
|
|
1339
|
+
usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
|
|
1340
|
+
};
|
|
1341
|
+
});
|
|
1186
1342
|
},
|
|
1187
1343
|
elementsVisible(image, elements, options) {
|
|
1188
1344
|
return checkElementsVisibility(image, elements, true, options);
|
|
@@ -1191,57 +1347,65 @@ function visualAI(config = {}) {
|
|
|
1191
1347
|
return checkElementsVisibility(image, elements, false, options);
|
|
1192
1348
|
},
|
|
1193
1349
|
async accessibility(image, options) {
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1350
|
+
return withErrorDebug(resolvedConfig, "accessibility", async () => {
|
|
1351
|
+
const img = await normalizeImage(image);
|
|
1352
|
+
const prompt = buildAccessibilityPrompt(options);
|
|
1353
|
+
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1354
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1355
|
+
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1356
|
+
const result = parseCheckResponse(response.text);
|
|
1357
|
+
return {
|
|
1358
|
+
...result,
|
|
1359
|
+
usage: processUsage(
|
|
1360
|
+
"accessibility",
|
|
1361
|
+
response.usage,
|
|
1362
|
+
response.durationSeconds,
|
|
1363
|
+
resolvedConfig
|
|
1364
|
+
)
|
|
1365
|
+
};
|
|
1366
|
+
});
|
|
1209
1367
|
},
|
|
1210
1368
|
async layout(image, options) {
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1369
|
+
return withErrorDebug(resolvedConfig, "layout", async () => {
|
|
1370
|
+
const img = await normalizeImage(image);
|
|
1371
|
+
const prompt = buildLayoutPrompt(options);
|
|
1372
|
+
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1373
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1374
|
+
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1375
|
+
const result = parseCheckResponse(response.text);
|
|
1376
|
+
return {
|
|
1377
|
+
...result,
|
|
1378
|
+
usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
|
|
1379
|
+
};
|
|
1380
|
+
});
|
|
1221
1381
|
},
|
|
1222
1382
|
async pageLoad(image, options) {
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1383
|
+
return withErrorDebug(resolvedConfig, "pageLoad", async () => {
|
|
1384
|
+
const img = await normalizeImage(image);
|
|
1385
|
+
const prompt = buildPageLoadPrompt(options);
|
|
1386
|
+
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1387
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1388
|
+
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1389
|
+
const result = parseCheckResponse(response.text);
|
|
1390
|
+
return {
|
|
1391
|
+
...result,
|
|
1392
|
+
usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
|
|
1393
|
+
};
|
|
1394
|
+
});
|
|
1233
1395
|
},
|
|
1234
1396
|
async content(image, options) {
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1397
|
+
return withErrorDebug(resolvedConfig, "content", async () => {
|
|
1398
|
+
const img = await normalizeImage(image);
|
|
1399
|
+
const prompt = buildContentPrompt(options);
|
|
1400
|
+
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1401
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1402
|
+
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1403
|
+
const result = parseCheckResponse(response.text);
|
|
1404
|
+
return {
|
|
1405
|
+
...result,
|
|
1406
|
+
usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
|
|
1407
|
+
};
|
|
1408
|
+
});
|
|
1245
1409
|
}
|
|
1246
1410
|
};
|
|
1247
1411
|
}
|
|
@@ -1325,6 +1489,7 @@ export {
|
|
|
1325
1489
|
VisualAIProviderError,
|
|
1326
1490
|
VisualAIRateLimitError,
|
|
1327
1491
|
VisualAIResponseParseError,
|
|
1492
|
+
VisualAITruncationError,
|
|
1328
1493
|
assertVisualCompareResult,
|
|
1329
1494
|
assertVisualResult,
|
|
1330
1495
|
formatCheckResult,
|