visual-ai-assertions 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -18
- package/dist/index.cjs +152 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +53 -6
- package/dist/index.d.ts +53 -6
- package/dist/index.js +150 -42
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
// src/constants.ts
|
|
2
|
+
var ReasoningEffort = {
|
|
3
|
+
LOW: "low",
|
|
4
|
+
MEDIUM: "medium",
|
|
5
|
+
HIGH: "high",
|
|
6
|
+
XHIGH: "xhigh"
|
|
7
|
+
};
|
|
2
8
|
var Provider = {
|
|
3
9
|
ANTHROPIC: "anthropic",
|
|
4
10
|
OPENAI: "openai",
|
|
@@ -20,6 +26,7 @@ var Model = {
|
|
|
20
26
|
},
|
|
21
27
|
Google: {
|
|
22
28
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
29
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
23
30
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
24
31
|
}
|
|
25
32
|
};
|
|
@@ -29,6 +36,7 @@ var DEFAULT_MODELS = {
|
|
|
29
36
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
30
37
|
};
|
|
31
38
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
39
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
32
40
|
var MODEL_TO_PROVIDER = new Map([
|
|
33
41
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
34
42
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
@@ -113,6 +121,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
113
121
|
this.rawResponse = rawResponse;
|
|
114
122
|
}
|
|
115
123
|
};
|
|
124
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
125
|
+
partialResponse;
|
|
126
|
+
maxTokens;
|
|
127
|
+
constructor(message, partialResponse, maxTokens) {
|
|
128
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
129
|
+
this.name = "VisualAITruncationError";
|
|
130
|
+
this.partialResponse = partialResponse;
|
|
131
|
+
this.maxTokens = maxTokens;
|
|
132
|
+
}
|
|
133
|
+
};
|
|
116
134
|
var VisualAIConfigError = class extends VisualAIError {
|
|
117
135
|
constructor(message) {
|
|
118
136
|
super(message, "CONFIG_INVALID");
|
|
@@ -128,7 +146,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
128
146
|
}
|
|
129
147
|
};
|
|
130
148
|
function isVisualAIKnownError(error) {
|
|
131
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
149
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
132
150
|
}
|
|
133
151
|
|
|
134
152
|
// src/core/prompt.ts
|
|
@@ -142,12 +160,18 @@ Each issue must have:
|
|
|
142
160
|
- "description": what the issue is
|
|
143
161
|
- "suggestion": how to fix or improve it
|
|
144
162
|
`;
|
|
145
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
163
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
164
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
165
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
166
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
167
|
+
4. Include "issues" only for statements that failed
|
|
168
|
+
|
|
169
|
+
Respond with a JSON object matching this exact structure:
|
|
146
170
|
{
|
|
147
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
148
|
-
"reasoning": string, // brief overall summary
|
|
149
|
-
"issues": [...], //
|
|
150
|
-
"statements": [ // one entry per statement, in order
|
|
171
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
172
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
173
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
174
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
151
175
|
{
|
|
152
176
|
"statement": string, // the original statement text
|
|
153
177
|
"pass": boolean, // whether this statement is true
|
|
@@ -166,7 +190,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
166
190
|
Example for a failing check:
|
|
167
191
|
{
|
|
168
192
|
"pass": false,
|
|
169
|
-
"reasoning": "
|
|
193
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
170
194
|
"issues": [
|
|
171
195
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
172
196
|
],
|
|
@@ -426,7 +450,7 @@ var AnthropicDriver = class {
|
|
|
426
450
|
this.client = new Anthropic({ apiKey });
|
|
427
451
|
return this.client;
|
|
428
452
|
}
|
|
429
|
-
async sendMessage(images, prompt) {
|
|
453
|
+
async sendMessage(images, prompt, _options) {
|
|
430
454
|
const client = await this.getClient();
|
|
431
455
|
const imageBlocks = images.map((img) => ({
|
|
432
456
|
type: "image",
|
|
@@ -456,6 +480,13 @@ var AnthropicDriver = class {
|
|
|
456
480
|
const message = await client.messages.create(requestParams);
|
|
457
481
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
458
482
|
const text = textBlock?.text ?? "";
|
|
483
|
+
if (message.stop_reason === "max_tokens") {
|
|
484
|
+
throw new VisualAITruncationError(
|
|
485
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
486
|
+
text,
|
|
487
|
+
this.maxTokens
|
|
488
|
+
);
|
|
489
|
+
}
|
|
459
490
|
return {
|
|
460
491
|
text,
|
|
461
492
|
usage: {
|
|
@@ -464,6 +495,7 @@ var AnthropicDriver = class {
|
|
|
464
495
|
}
|
|
465
496
|
};
|
|
466
497
|
} catch (err) {
|
|
498
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
467
499
|
throw mapProviderError(err);
|
|
468
500
|
}
|
|
469
501
|
}
|
|
@@ -475,11 +507,11 @@ function needsCodeExecution(model) {
|
|
|
475
507
|
const match = model.match(/^gemini-(\d+)/);
|
|
476
508
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
477
509
|
}
|
|
478
|
-
var
|
|
479
|
-
low:
|
|
480
|
-
medium:
|
|
481
|
-
high:
|
|
482
|
-
xhigh:
|
|
510
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
511
|
+
low: "minimal",
|
|
512
|
+
medium: "low",
|
|
513
|
+
high: "medium",
|
|
514
|
+
xhigh: "high"
|
|
483
515
|
};
|
|
484
516
|
var GoogleDriver = class {
|
|
485
517
|
client;
|
|
@@ -519,7 +551,7 @@ var GoogleDriver = class {
|
|
|
519
551
|
this.client = new GoogleGenAI({ apiKey });
|
|
520
552
|
return this.client;
|
|
521
553
|
}
|
|
522
|
-
async sendMessage(images, prompt) {
|
|
554
|
+
async sendMessage(images, prompt, _options) {
|
|
523
555
|
const client = await this.getClient();
|
|
524
556
|
try {
|
|
525
557
|
const response = await client.models.generateContent({
|
|
@@ -530,20 +562,36 @@ var GoogleDriver = class {
|
|
|
530
562
|
maxOutputTokens: this.maxTokens,
|
|
531
563
|
...this.reasoningEffort && {
|
|
532
564
|
thinkingConfig: {
|
|
533
|
-
|
|
565
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
534
566
|
}
|
|
535
567
|
}
|
|
536
568
|
}
|
|
537
569
|
});
|
|
570
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
571
|
+
if (finishReason === "MAX_TOKENS") {
|
|
572
|
+
throw new VisualAITruncationError(
|
|
573
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
574
|
+
response.text ?? "",
|
|
575
|
+
this.maxTokens
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
if (finishReason && finishReason !== "STOP") {
|
|
579
|
+
throw new VisualAIProviderError(
|
|
580
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
581
|
+
);
|
|
582
|
+
}
|
|
538
583
|
const text = response.text ?? "";
|
|
584
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
539
585
|
return {
|
|
540
586
|
text,
|
|
541
587
|
usage: response.usageMetadata ? {
|
|
542
588
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
543
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
589
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
590
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
544
591
|
} : void 0
|
|
545
592
|
};
|
|
546
593
|
} catch (err) {
|
|
594
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
547
595
|
throw mapProviderError(err);
|
|
548
596
|
}
|
|
549
597
|
}
|
|
@@ -615,17 +663,25 @@ var OpenAIDriver = class {
|
|
|
615
663
|
this.client = new OpenAI({ apiKey });
|
|
616
664
|
return this.client;
|
|
617
665
|
}
|
|
618
|
-
async sendMessage(images, prompt) {
|
|
666
|
+
async sendMessage(images, prompt, options) {
|
|
619
667
|
const client = await this.getClient();
|
|
620
668
|
const imageBlocks = images.map((img) => ({
|
|
621
669
|
type: "input_image",
|
|
622
670
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
623
671
|
}));
|
|
624
672
|
try {
|
|
673
|
+
const format = options?.responseSchema ? {
|
|
674
|
+
type: "json_schema",
|
|
675
|
+
json_schema: {
|
|
676
|
+
name: "visual_ai_response",
|
|
677
|
+
strict: true,
|
|
678
|
+
schema: options.responseSchema
|
|
679
|
+
}
|
|
680
|
+
} : { type: "json_object", name: "visual_ai_response" };
|
|
625
681
|
const requestParams = {
|
|
626
682
|
model: this.model,
|
|
627
683
|
max_output_tokens: this.maxTokens,
|
|
628
|
-
text: { format
|
|
684
|
+
text: { format },
|
|
629
685
|
input: [
|
|
630
686
|
{
|
|
631
687
|
role: "user",
|
|
@@ -637,15 +693,26 @@ var OpenAIDriver = class {
|
|
|
637
693
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
638
694
|
}
|
|
639
695
|
const response = await client.responses.create(requestParams);
|
|
696
|
+
if (response.status && response.status !== "completed") {
|
|
697
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
698
|
+
throw new VisualAITruncationError(
|
|
699
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
700
|
+
response.output_text ?? "",
|
|
701
|
+
this.maxTokens
|
|
702
|
+
);
|
|
703
|
+
}
|
|
640
704
|
const text = response.output_text ?? "";
|
|
705
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
641
706
|
return {
|
|
642
707
|
text,
|
|
643
708
|
usage: response.usage ? {
|
|
644
709
|
inputTokens: response.usage.input_tokens,
|
|
645
|
-
outputTokens: response.usage.output_tokens
|
|
710
|
+
outputTokens: response.usage.output_tokens,
|
|
711
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
646
712
|
} : void 0
|
|
647
713
|
};
|
|
648
714
|
} catch (err) {
|
|
715
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
649
716
|
throw mapProviderError(err);
|
|
650
717
|
}
|
|
651
718
|
}
|
|
@@ -692,15 +759,6 @@ function parseBooleanEnv(envName, value) {
|
|
|
692
759
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
693
760
|
);
|
|
694
761
|
}
|
|
695
|
-
var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
|
|
696
|
-
function parseReasoningEffortEnv(envName, value) {
|
|
697
|
-
if (value === void 0 || value === "") return void 0;
|
|
698
|
-
const lower = value.toLowerCase();
|
|
699
|
-
if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
|
|
700
|
-
throw new VisualAIConfigError(
|
|
701
|
-
`Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
|
|
702
|
-
);
|
|
703
|
-
}
|
|
704
762
|
var debugDeprecationWarned = false;
|
|
705
763
|
function resolveConfig(config) {
|
|
706
764
|
const provider = resolveProvider(config);
|
|
@@ -715,12 +773,23 @@ function resolveConfig(config) {
|
|
|
715
773
|
`
|
|
716
774
|
);
|
|
717
775
|
}
|
|
776
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
777
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
778
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
779
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
780
|
+
if (debug) {
|
|
781
|
+
process.stderr.write(
|
|
782
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
783
|
+
`
|
|
784
|
+
);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
718
787
|
return {
|
|
719
788
|
provider,
|
|
720
789
|
apiKey: config.apiKey,
|
|
721
790
|
model,
|
|
722
|
-
maxTokens
|
|
723
|
-
reasoningEffort: config.reasoningEffort
|
|
791
|
+
maxTokens,
|
|
792
|
+
reasoningEffort: config.reasoningEffort,
|
|
724
793
|
debug,
|
|
725
794
|
debugPrompt,
|
|
726
795
|
debugResponse,
|
|
@@ -771,6 +840,10 @@ var PRICING_TABLE = {
|
|
|
771
840
|
inputPricePerToken: 2 / PER_MILLION,
|
|
772
841
|
outputPricePerToken: 12 / PER_MILLION
|
|
773
842
|
},
|
|
843
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
844
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
845
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
846
|
+
},
|
|
774
847
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
775
848
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
776
849
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -795,8 +868,9 @@ function usageLog(config, method, usage) {
|
|
|
795
868
|
if (!config.trackUsage) return;
|
|
796
869
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
797
870
|
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
871
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
798
872
|
process.stderr.write(
|
|
799
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
873
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
800
874
|
`
|
|
801
875
|
);
|
|
802
876
|
}
|
|
@@ -806,6 +880,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
806
880
|
const usage = {
|
|
807
881
|
inputTokens,
|
|
808
882
|
outputTokens,
|
|
883
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
809
884
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
810
885
|
durationSeconds
|
|
811
886
|
};
|
|
@@ -814,6 +889,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
814
889
|
}
|
|
815
890
|
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
816
891
|
function formatError(error) {
|
|
892
|
+
if (error instanceof VisualAITruncationError) {
|
|
893
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
894
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
895
|
+
}
|
|
817
896
|
if (error instanceof VisualAIResponseParseError) {
|
|
818
897
|
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
819
898
|
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
@@ -834,9 +913,9 @@ async function withErrorDebug(config, method, fn) {
|
|
|
834
913
|
throw error;
|
|
835
914
|
}
|
|
836
915
|
}
|
|
837
|
-
async function timedSendMessage(driver, images, prompt) {
|
|
916
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
838
917
|
const start = performance.now();
|
|
839
|
-
const response = await driver.sendMessage(images, prompt);
|
|
918
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
840
919
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
841
920
|
return { ...response, durationSeconds };
|
|
842
921
|
}
|
|
@@ -1076,6 +1155,8 @@ var StatementResultSchema = z.object({
|
|
|
1076
1155
|
var UsageInfoSchema = z.object({
|
|
1077
1156
|
inputTokens: z.number(),
|
|
1078
1157
|
outputTokens: z.number(),
|
|
1158
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1159
|
+
reasoningTokens: z.number().optional(),
|
|
1079
1160
|
estimatedCost: z.number().optional(),
|
|
1080
1161
|
durationSeconds: z.number().nonnegative().optional()
|
|
1081
1162
|
});
|
|
@@ -1129,8 +1210,24 @@ function parseResponse(raw, schema) {
|
|
|
1129
1210
|
}
|
|
1130
1211
|
return result.data;
|
|
1131
1212
|
}
|
|
1213
|
+
function reconcileCheckResult(result) {
|
|
1214
|
+
if (result.statements.length === 0) {
|
|
1215
|
+
return result;
|
|
1216
|
+
}
|
|
1217
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1218
|
+
const total = result.statements.length;
|
|
1219
|
+
const computedPass = passCount === total;
|
|
1220
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1221
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1222
|
+
return {
|
|
1223
|
+
...result,
|
|
1224
|
+
pass: computedPass,
|
|
1225
|
+
reasoning
|
|
1226
|
+
};
|
|
1227
|
+
}
|
|
1132
1228
|
function parseCheckResponse(raw) {
|
|
1133
|
-
|
|
1229
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1230
|
+
return reconcileCheckResult(result);
|
|
1134
1231
|
}
|
|
1135
1232
|
function parseAskResponse(raw) {
|
|
1136
1233
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1140,6 +1237,12 @@ function parseCompareResponse(raw) {
|
|
|
1140
1237
|
}
|
|
1141
1238
|
|
|
1142
1239
|
// src/core/client.ts
|
|
1240
|
+
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
1241
|
+
function toSchemaOptions(schema) {
|
|
1242
|
+
return {
|
|
1243
|
+
responseSchema: zodToJsonSchema(schema, { target: "openAi" })
|
|
1244
|
+
};
|
|
1245
|
+
}
|
|
1143
1246
|
var PROVIDER_REGISTRY = {
|
|
1144
1247
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1145
1248
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1148,6 +1251,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1148
1251
|
function createDriver(provider, config) {
|
|
1149
1252
|
return PROVIDER_REGISTRY[provider](config);
|
|
1150
1253
|
}
|
|
1254
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1255
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1256
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1151
1257
|
function visualAI(config = {}) {
|
|
1152
1258
|
const resolvedConfig = resolveConfig(config);
|
|
1153
1259
|
const driverConfig = {
|
|
@@ -1166,7 +1272,7 @@ function visualAI(config = {}) {
|
|
|
1166
1272
|
const img = await normalizeImage(image);
|
|
1167
1273
|
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1168
1274
|
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1169
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1275
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1170
1276
|
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1171
1277
|
const result = parseCheckResponse(response.text);
|
|
1172
1278
|
return {
|
|
@@ -1185,7 +1291,7 @@ function visualAI(config = {}) {
|
|
|
1185
1291
|
const img = await normalizeImage(image);
|
|
1186
1292
|
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1187
1293
|
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1188
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1294
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1189
1295
|
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1190
1296
|
const result = parseCheckResponse(response.text);
|
|
1191
1297
|
return {
|
|
@@ -1199,7 +1305,7 @@ function visualAI(config = {}) {
|
|
|
1199
1305
|
const img = await normalizeImage(image);
|
|
1200
1306
|
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1201
1307
|
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1202
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1308
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1203
1309
|
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1204
1310
|
const result = parseAskResponse(response.text);
|
|
1205
1311
|
return {
|
|
@@ -1216,7 +1322,7 @@ function visualAI(config = {}) {
|
|
|
1216
1322
|
instructions: options?.instructions
|
|
1217
1323
|
});
|
|
1218
1324
|
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1219
|
-
const response = await timedSendMessage(driver, [imgA, imgB], prompt);
|
|
1325
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1220
1326
|
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1221
1327
|
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1222
1328
|
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
@@ -1251,7 +1357,7 @@ function visualAI(config = {}) {
|
|
|
1251
1357
|
const img = await normalizeImage(image);
|
|
1252
1358
|
const prompt = buildAccessibilityPrompt(options);
|
|
1253
1359
|
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1254
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1360
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1255
1361
|
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1256
1362
|
const result = parseCheckResponse(response.text);
|
|
1257
1363
|
return {
|
|
@@ -1270,7 +1376,7 @@ function visualAI(config = {}) {
|
|
|
1270
1376
|
const img = await normalizeImage(image);
|
|
1271
1377
|
const prompt = buildLayoutPrompt(options);
|
|
1272
1378
|
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1273
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1379
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1274
1380
|
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1275
1381
|
const result = parseCheckResponse(response.text);
|
|
1276
1382
|
return {
|
|
@@ -1284,7 +1390,7 @@ function visualAI(config = {}) {
|
|
|
1284
1390
|
const img = await normalizeImage(image);
|
|
1285
1391
|
const prompt = buildPageLoadPrompt(options);
|
|
1286
1392
|
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1287
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1393
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1288
1394
|
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1289
1395
|
const result = parseCheckResponse(response.text);
|
|
1290
1396
|
return {
|
|
@@ -1298,7 +1404,7 @@ function visualAI(config = {}) {
|
|
|
1298
1404
|
const img = await normalizeImage(image);
|
|
1299
1405
|
const prompt = buildContentPrompt(options);
|
|
1300
1406
|
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1301
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1407
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1302
1408
|
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1303
1409
|
const result = parseCheckResponse(response.text);
|
|
1304
1410
|
return {
|
|
@@ -1379,6 +1485,7 @@ export {
|
|
|
1379
1485
|
Layout,
|
|
1380
1486
|
Model,
|
|
1381
1487
|
Provider,
|
|
1488
|
+
ReasoningEffort,
|
|
1382
1489
|
StatementResultSchema,
|
|
1383
1490
|
UsageInfoSchema,
|
|
1384
1491
|
VisualAIAssertionError,
|
|
@@ -1389,6 +1496,7 @@ export {
|
|
|
1389
1496
|
VisualAIProviderError,
|
|
1390
1497
|
VisualAIRateLimitError,
|
|
1391
1498
|
VisualAIResponseParseError,
|
|
1499
|
+
VisualAITruncationError,
|
|
1392
1500
|
assertVisualCompareResult,
|
|
1393
1501
|
assertVisualResult,
|
|
1394
1502
|
formatCheckResult,
|