visual-ai-assertions 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -18
- package/dist/index.cjs +144 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +43 -3
- package/dist/index.d.ts +43 -3
- package/dist/index.js +143 -42
- package/dist/index.js.map +1 -1
- package/package.json +3 -2
package/dist/index.js
CHANGED
|
@@ -20,6 +20,7 @@ var Model = {
|
|
|
20
20
|
},
|
|
21
21
|
Google: {
|
|
22
22
|
GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
|
|
23
|
+
GEMINI_3_1_FLASH_LITE_PREVIEW: "gemini-3.1-flash-lite-preview",
|
|
23
24
|
GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
|
|
24
25
|
}
|
|
25
26
|
};
|
|
@@ -29,6 +30,7 @@ var DEFAULT_MODELS = {
|
|
|
29
30
|
[Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
|
|
30
31
|
};
|
|
31
32
|
var DEFAULT_MAX_TOKENS = 4096;
|
|
33
|
+
var OPENAI_REASONING_MAX_TOKENS = 16384;
|
|
32
34
|
var MODEL_TO_PROVIDER = new Map([
|
|
33
35
|
...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
|
|
34
36
|
...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
|
|
@@ -113,6 +115,16 @@ var VisualAIResponseParseError = class extends VisualAIError {
|
|
|
113
115
|
this.rawResponse = rawResponse;
|
|
114
116
|
}
|
|
115
117
|
};
|
|
118
|
+
var VisualAITruncationError = class extends VisualAIError {
|
|
119
|
+
partialResponse;
|
|
120
|
+
maxTokens;
|
|
121
|
+
constructor(message, partialResponse, maxTokens) {
|
|
122
|
+
super(message, "RESPONSE_TRUNCATED");
|
|
123
|
+
this.name = "VisualAITruncationError";
|
|
124
|
+
this.partialResponse = partialResponse;
|
|
125
|
+
this.maxTokens = maxTokens;
|
|
126
|
+
}
|
|
127
|
+
};
|
|
116
128
|
var VisualAIConfigError = class extends VisualAIError {
|
|
117
129
|
constructor(message) {
|
|
118
130
|
super(message, "CONFIG_INVALID");
|
|
@@ -128,7 +140,7 @@ var VisualAIAssertionError = class extends VisualAIError {
|
|
|
128
140
|
}
|
|
129
141
|
};
|
|
130
142
|
function isVisualAIKnownError(error) {
|
|
131
|
-
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
143
|
+
return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAITruncationError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
|
|
132
144
|
}
|
|
133
145
|
|
|
134
146
|
// src/core/prompt.ts
|
|
@@ -142,12 +154,18 @@ Each issue must have:
|
|
|
142
154
|
- "description": what the issue is
|
|
143
155
|
- "suggestion": how to fix or improve it
|
|
144
156
|
`;
|
|
145
|
-
var CHECK_OUTPUT_SCHEMA = `
|
|
157
|
+
var CHECK_OUTPUT_SCHEMA = `IMPORTANT: Follow this evaluation order:
|
|
158
|
+
1. First, evaluate EACH statement independently and populate the "statements" array
|
|
159
|
+
2. Then, set "pass" to true ONLY if every statement passed (logical AND of all statement results)
|
|
160
|
+
3. Write "reasoning" as a brief overall summary of the evaluation
|
|
161
|
+
4. Include "issues" only for statements that failed
|
|
162
|
+
|
|
163
|
+
Respond with a JSON object matching this exact structure:
|
|
146
164
|
{
|
|
147
|
-
"pass": boolean, // true ONLY if ALL statements
|
|
148
|
-
"reasoning": string, // brief overall summary
|
|
149
|
-
"issues": [...], //
|
|
150
|
-
"statements": [ // one entry per statement, in order
|
|
165
|
+
"pass": boolean, // true ONLY if ALL statements passed \u2014 derive from statements array
|
|
166
|
+
"reasoning": string, // brief overall summary of the evaluation
|
|
167
|
+
"issues": [...], // one issue per failing statement (empty if all pass)
|
|
168
|
+
"statements": [ // one entry per statement, in order \u2014 evaluate these FIRST
|
|
151
169
|
{
|
|
152
170
|
"statement": string, // the original statement text
|
|
153
171
|
"pass": boolean, // whether this statement is true
|
|
@@ -166,7 +184,7 @@ Only include issues for statements that fail. If all statements pass, issues sho
|
|
|
166
184
|
Example for a failing check:
|
|
167
185
|
{
|
|
168
186
|
"pass": false,
|
|
169
|
-
"reasoning": "
|
|
187
|
+
"reasoning": "The submit button is not visible on the page.",
|
|
170
188
|
"issues": [
|
|
171
189
|
{ "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
|
|
172
190
|
],
|
|
@@ -426,7 +444,7 @@ var AnthropicDriver = class {
|
|
|
426
444
|
this.client = new Anthropic({ apiKey });
|
|
427
445
|
return this.client;
|
|
428
446
|
}
|
|
429
|
-
async sendMessage(images, prompt) {
|
|
447
|
+
async sendMessage(images, prompt, _options) {
|
|
430
448
|
const client = await this.getClient();
|
|
431
449
|
const imageBlocks = images.map((img) => ({
|
|
432
450
|
type: "image",
|
|
@@ -456,6 +474,13 @@ var AnthropicDriver = class {
|
|
|
456
474
|
const message = await client.messages.create(requestParams);
|
|
457
475
|
const textBlock = message.content.find((block) => block.type === "text");
|
|
458
476
|
const text = textBlock?.text ?? "";
|
|
477
|
+
if (message.stop_reason === "max_tokens") {
|
|
478
|
+
throw new VisualAITruncationError(
|
|
479
|
+
`Response truncated: Anthropic stopped due to max_tokens limit (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
480
|
+
text,
|
|
481
|
+
this.maxTokens
|
|
482
|
+
);
|
|
483
|
+
}
|
|
459
484
|
return {
|
|
460
485
|
text,
|
|
461
486
|
usage: {
|
|
@@ -464,6 +489,7 @@ var AnthropicDriver = class {
|
|
|
464
489
|
}
|
|
465
490
|
};
|
|
466
491
|
} catch (err) {
|
|
492
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
467
493
|
throw mapProviderError(err);
|
|
468
494
|
}
|
|
469
495
|
}
|
|
@@ -475,11 +501,11 @@ function needsCodeExecution(model) {
|
|
|
475
501
|
const match = model.match(/^gemini-(\d+)/);
|
|
476
502
|
return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
|
|
477
503
|
}
|
|
478
|
-
var
|
|
479
|
-
low:
|
|
480
|
-
medium:
|
|
481
|
-
high:
|
|
482
|
-
xhigh:
|
|
504
|
+
var GOOGLE_THINKING_LEVEL = {
|
|
505
|
+
low: "minimal",
|
|
506
|
+
medium: "low",
|
|
507
|
+
high: "medium",
|
|
508
|
+
xhigh: "high"
|
|
483
509
|
};
|
|
484
510
|
var GoogleDriver = class {
|
|
485
511
|
client;
|
|
@@ -519,7 +545,7 @@ var GoogleDriver = class {
|
|
|
519
545
|
this.client = new GoogleGenAI({ apiKey });
|
|
520
546
|
return this.client;
|
|
521
547
|
}
|
|
522
|
-
async sendMessage(images, prompt) {
|
|
548
|
+
async sendMessage(images, prompt, _options) {
|
|
523
549
|
const client = await this.getClient();
|
|
524
550
|
try {
|
|
525
551
|
const response = await client.models.generateContent({
|
|
@@ -530,20 +556,36 @@ var GoogleDriver = class {
|
|
|
530
556
|
maxOutputTokens: this.maxTokens,
|
|
531
557
|
...this.reasoningEffort && {
|
|
532
558
|
thinkingConfig: {
|
|
533
|
-
|
|
559
|
+
thinkingLevel: GOOGLE_THINKING_LEVEL[this.reasoningEffort]
|
|
534
560
|
}
|
|
535
561
|
}
|
|
536
562
|
}
|
|
537
563
|
});
|
|
564
|
+
const finishReason = response.candidates?.[0]?.finishReason;
|
|
565
|
+
if (finishReason === "MAX_TOKENS") {
|
|
566
|
+
throw new VisualAITruncationError(
|
|
567
|
+
`Response truncated: Google returned finishReason "MAX_TOKENS". The model exhausted the output token budget (${this.maxTokens} tokens). Increase maxTokens in your config or lower reasoningEffort.`,
|
|
568
|
+
response.text ?? "",
|
|
569
|
+
this.maxTokens
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
if (finishReason && finishReason !== "STOP") {
|
|
573
|
+
throw new VisualAIProviderError(
|
|
574
|
+
`Response blocked: Google returned finishReason "${finishReason}".`
|
|
575
|
+
);
|
|
576
|
+
}
|
|
538
577
|
const text = response.text ?? "";
|
|
578
|
+
const thoughtsTokenCount = response.usageMetadata?.thoughtsTokenCount;
|
|
539
579
|
return {
|
|
540
580
|
text,
|
|
541
581
|
usage: response.usageMetadata ? {
|
|
542
582
|
inputTokens: response.usageMetadata.promptTokenCount ?? 0,
|
|
543
|
-
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
|
|
583
|
+
outputTokens: response.usageMetadata.candidatesTokenCount ?? 0,
|
|
584
|
+
...thoughtsTokenCount !== void 0 && { reasoningTokens: thoughtsTokenCount }
|
|
544
585
|
} : void 0
|
|
545
586
|
};
|
|
546
587
|
} catch (err) {
|
|
588
|
+
if (err instanceof VisualAITruncationError || err instanceof VisualAIProviderError) throw err;
|
|
547
589
|
throw mapProviderError(err);
|
|
548
590
|
}
|
|
549
591
|
}
|
|
@@ -615,17 +657,25 @@ var OpenAIDriver = class {
|
|
|
615
657
|
this.client = new OpenAI({ apiKey });
|
|
616
658
|
return this.client;
|
|
617
659
|
}
|
|
618
|
-
async sendMessage(images, prompt) {
|
|
660
|
+
async sendMessage(images, prompt, options) {
|
|
619
661
|
const client = await this.getClient();
|
|
620
662
|
const imageBlocks = images.map((img) => ({
|
|
621
663
|
type: "input_image",
|
|
622
664
|
image_url: `data:${img.mimeType};base64,${img.base64}`
|
|
623
665
|
}));
|
|
624
666
|
try {
|
|
667
|
+
const format = options?.responseSchema ? {
|
|
668
|
+
type: "json_schema",
|
|
669
|
+
json_schema: {
|
|
670
|
+
name: "visual_ai_response",
|
|
671
|
+
strict: true,
|
|
672
|
+
schema: options.responseSchema
|
|
673
|
+
}
|
|
674
|
+
} : { type: "json_object" };
|
|
625
675
|
const requestParams = {
|
|
626
676
|
model: this.model,
|
|
627
677
|
max_output_tokens: this.maxTokens,
|
|
628
|
-
text: { format
|
|
678
|
+
text: { format },
|
|
629
679
|
input: [
|
|
630
680
|
{
|
|
631
681
|
role: "user",
|
|
@@ -637,15 +687,26 @@ var OpenAIDriver = class {
|
|
|
637
687
|
requestParams.reasoning = { effort: this.reasoningEffort };
|
|
638
688
|
}
|
|
639
689
|
const response = await client.responses.create(requestParams);
|
|
690
|
+
if (response.status && response.status !== "completed") {
|
|
691
|
+
const detail = response.incomplete_details?.reason ? ` (${response.incomplete_details.reason})` : "";
|
|
692
|
+
throw new VisualAITruncationError(
|
|
693
|
+
`Response truncated: OpenAI returned status "${response.status}"${detail}. The model exhausted the output token budget (${this.maxTokens} tokens). This commonly happens with higher reasoning effort levels. Increase maxTokens in your config (e.g., maxTokens: 16384) or lower reasoningEffort.`,
|
|
694
|
+
response.output_text ?? "",
|
|
695
|
+
this.maxTokens
|
|
696
|
+
);
|
|
697
|
+
}
|
|
640
698
|
const text = response.output_text ?? "";
|
|
699
|
+
const reasoningTokens = response.usage?.output_tokens_details?.reasoning_tokens;
|
|
641
700
|
return {
|
|
642
701
|
text,
|
|
643
702
|
usage: response.usage ? {
|
|
644
703
|
inputTokens: response.usage.input_tokens,
|
|
645
|
-
outputTokens: response.usage.output_tokens
|
|
704
|
+
outputTokens: response.usage.output_tokens,
|
|
705
|
+
...reasoningTokens !== void 0 && { reasoningTokens }
|
|
646
706
|
} : void 0
|
|
647
707
|
};
|
|
648
708
|
} catch (err) {
|
|
709
|
+
if (err instanceof VisualAITruncationError) throw err;
|
|
649
710
|
throw mapProviderError(err);
|
|
650
711
|
}
|
|
651
712
|
}
|
|
@@ -692,15 +753,6 @@ function parseBooleanEnv(envName, value) {
|
|
|
692
753
|
`Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
|
|
693
754
|
);
|
|
694
755
|
}
|
|
695
|
-
var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
|
|
696
|
-
function parseReasoningEffortEnv(envName, value) {
|
|
697
|
-
if (value === void 0 || value === "") return void 0;
|
|
698
|
-
const lower = value.toLowerCase();
|
|
699
|
-
if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
|
|
700
|
-
throw new VisualAIConfigError(
|
|
701
|
-
`Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
|
|
702
|
-
);
|
|
703
|
-
}
|
|
704
756
|
var debugDeprecationWarned = false;
|
|
705
757
|
function resolveConfig(config) {
|
|
706
758
|
const provider = resolveProvider(config);
|
|
@@ -715,12 +767,23 @@ function resolveConfig(config) {
|
|
|
715
767
|
`
|
|
716
768
|
);
|
|
717
769
|
}
|
|
770
|
+
const userSetMaxTokens = config.maxTokens !== void 0;
|
|
771
|
+
let maxTokens = config.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
772
|
+
if (!userSetMaxTokens && provider === "openai" && (config.reasoningEffort === "high" || config.reasoningEffort === "xhigh")) {
|
|
773
|
+
maxTokens = OPENAI_REASONING_MAX_TOKENS;
|
|
774
|
+
if (debug) {
|
|
775
|
+
process.stderr.write(
|
|
776
|
+
`[visual-ai-assertions] Auto-increased maxTokens from ${DEFAULT_MAX_TOKENS} to ${OPENAI_REASONING_MAX_TOKENS} for OpenAI with reasoningEffort "${config.reasoningEffort}".
|
|
777
|
+
`
|
|
778
|
+
);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
718
781
|
return {
|
|
719
782
|
provider,
|
|
720
783
|
apiKey: config.apiKey,
|
|
721
784
|
model,
|
|
722
|
-
maxTokens
|
|
723
|
-
reasoningEffort: config.reasoningEffort
|
|
785
|
+
maxTokens,
|
|
786
|
+
reasoningEffort: config.reasoningEffort,
|
|
724
787
|
debug,
|
|
725
788
|
debugPrompt,
|
|
726
789
|
debugResponse,
|
|
@@ -771,6 +834,10 @@ var PRICING_TABLE = {
|
|
|
771
834
|
inputPricePerToken: 2 / PER_MILLION,
|
|
772
835
|
outputPricePerToken: 12 / PER_MILLION
|
|
773
836
|
},
|
|
837
|
+
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_FLASH_LITE_PREVIEW}`]: {
|
|
838
|
+
inputPricePerToken: 0.25 / PER_MILLION,
|
|
839
|
+
outputPricePerToken: 1.5 / PER_MILLION
|
|
840
|
+
},
|
|
774
841
|
[`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
|
|
775
842
|
inputPricePerToken: 0.5 / PER_MILLION,
|
|
776
843
|
outputPricePerToken: 3 / PER_MILLION
|
|
@@ -795,8 +862,9 @@ function usageLog(config, method, usage) {
|
|
|
795
862
|
if (!config.trackUsage) return;
|
|
796
863
|
const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
|
|
797
864
|
const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
|
|
865
|
+
const reasoningTokenStr = usage.reasoningTokens !== void 0 ? ` (${usage.reasoningTokens} reasoning)` : "";
|
|
798
866
|
process.stderr.write(
|
|
799
|
-
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
867
|
+
`[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output${reasoningTokenStr} tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
|
|
800
868
|
`
|
|
801
869
|
);
|
|
802
870
|
}
|
|
@@ -806,6 +874,7 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
806
874
|
const usage = {
|
|
807
875
|
inputTokens,
|
|
808
876
|
outputTokens,
|
|
877
|
+
...rawUsage?.reasoningTokens !== void 0 && { reasoningTokens: rawUsage.reasoningTokens },
|
|
809
878
|
estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
|
|
810
879
|
durationSeconds
|
|
811
880
|
};
|
|
@@ -814,6 +883,10 @@ function processUsage(method, rawUsage, durationSeconds, config) {
|
|
|
814
883
|
}
|
|
815
884
|
var MAX_RAW_RESPONSE_PREVIEW = 500;
|
|
816
885
|
function formatError(error) {
|
|
886
|
+
if (error instanceof VisualAITruncationError) {
|
|
887
|
+
const preview = error.partialResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.partialResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.partialResponse;
|
|
888
|
+
return `${error.name} (${error.code}): ${error.message}. Partial response: ${preview}`;
|
|
889
|
+
}
|
|
817
890
|
if (error instanceof VisualAIResponseParseError) {
|
|
818
891
|
const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
|
|
819
892
|
return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
|
|
@@ -834,9 +907,9 @@ async function withErrorDebug(config, method, fn) {
|
|
|
834
907
|
throw error;
|
|
835
908
|
}
|
|
836
909
|
}
|
|
837
|
-
async function timedSendMessage(driver, images, prompt) {
|
|
910
|
+
async function timedSendMessage(driver, images, prompt, options) {
|
|
838
911
|
const start = performance.now();
|
|
839
|
-
const response = await driver.sendMessage(images, prompt);
|
|
912
|
+
const response = await driver.sendMessage(images, prompt, options);
|
|
840
913
|
const durationSeconds = (performance.now() - start) / 1e3;
|
|
841
914
|
return { ...response, durationSeconds };
|
|
842
915
|
}
|
|
@@ -1076,6 +1149,8 @@ var StatementResultSchema = z.object({
|
|
|
1076
1149
|
var UsageInfoSchema = z.object({
|
|
1077
1150
|
inputTokens: z.number(),
|
|
1078
1151
|
outputTokens: z.number(),
|
|
1152
|
+
/** Reasoning/thinking tokens consumed by the model (informational, typically included within outputTokens). */
|
|
1153
|
+
reasoningTokens: z.number().optional(),
|
|
1079
1154
|
estimatedCost: z.number().optional(),
|
|
1080
1155
|
durationSeconds: z.number().nonnegative().optional()
|
|
1081
1156
|
});
|
|
@@ -1129,8 +1204,24 @@ function parseResponse(raw, schema) {
|
|
|
1129
1204
|
}
|
|
1130
1205
|
return result.data;
|
|
1131
1206
|
}
|
|
1207
|
+
function reconcileCheckResult(result) {
|
|
1208
|
+
if (result.statements.length === 0) {
|
|
1209
|
+
return result;
|
|
1210
|
+
}
|
|
1211
|
+
const passCount = result.statements.filter((s) => s.pass).length;
|
|
1212
|
+
const total = result.statements.length;
|
|
1213
|
+
const computedPass = passCount === total;
|
|
1214
|
+
const countPrefix = `${passCount} of ${total} checks passed`;
|
|
1215
|
+
const reasoning = `${countPrefix}. ${result.reasoning}`;
|
|
1216
|
+
return {
|
|
1217
|
+
...result,
|
|
1218
|
+
pass: computedPass,
|
|
1219
|
+
reasoning
|
|
1220
|
+
};
|
|
1221
|
+
}
|
|
1132
1222
|
function parseCheckResponse(raw) {
|
|
1133
|
-
|
|
1223
|
+
const result = parseResponse(raw, CheckResponseSchema);
|
|
1224
|
+
return reconcileCheckResult(result);
|
|
1134
1225
|
}
|
|
1135
1226
|
function parseAskResponse(raw) {
|
|
1136
1227
|
return parseResponse(raw, AskResponseSchema);
|
|
@@ -1140,6 +1231,12 @@ function parseCompareResponse(raw) {
|
|
|
1140
1231
|
}
|
|
1141
1232
|
|
|
1142
1233
|
// src/core/client.ts
|
|
1234
|
+
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
1235
|
+
function toSchemaOptions(schema) {
|
|
1236
|
+
return {
|
|
1237
|
+
responseSchema: zodToJsonSchema(schema, { target: "openAi" })
|
|
1238
|
+
};
|
|
1239
|
+
}
|
|
1143
1240
|
var PROVIDER_REGISTRY = {
|
|
1144
1241
|
anthropic: (config) => new AnthropicDriver(config),
|
|
1145
1242
|
openai: (config) => new OpenAIDriver(config),
|
|
@@ -1148,6 +1245,9 @@ var PROVIDER_REGISTRY = {
|
|
|
1148
1245
|
function createDriver(provider, config) {
|
|
1149
1246
|
return PROVIDER_REGISTRY[provider](config);
|
|
1150
1247
|
}
|
|
1248
|
+
var checkSchemaOptions = toSchemaOptions(CheckResponseSchema);
|
|
1249
|
+
var askSchemaOptions = toSchemaOptions(AskResponseSchema);
|
|
1250
|
+
var compareSchemaOptions = toSchemaOptions(CompareResponseSchema);
|
|
1151
1251
|
function visualAI(config = {}) {
|
|
1152
1252
|
const resolvedConfig = resolveConfig(config);
|
|
1153
1253
|
const driverConfig = {
|
|
@@ -1166,7 +1266,7 @@ function visualAI(config = {}) {
|
|
|
1166
1266
|
const img = await normalizeImage(image);
|
|
1167
1267
|
const prompt = buildElementsVisibilityPrompt(elements, visible, options);
|
|
1168
1268
|
debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
|
|
1169
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1269
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1170
1270
|
debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
|
|
1171
1271
|
const result = parseCheckResponse(response.text);
|
|
1172
1272
|
return {
|
|
@@ -1185,7 +1285,7 @@ function visualAI(config = {}) {
|
|
|
1185
1285
|
const img = await normalizeImage(image);
|
|
1186
1286
|
const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
|
|
1187
1287
|
debugLog(resolvedConfig, "check prompt", prompt, "prompt");
|
|
1188
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1288
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1189
1289
|
debugLog(resolvedConfig, "check response", response.text, "response");
|
|
1190
1290
|
const result = parseCheckResponse(response.text);
|
|
1191
1291
|
return {
|
|
@@ -1199,7 +1299,7 @@ function visualAI(config = {}) {
|
|
|
1199
1299
|
const img = await normalizeImage(image);
|
|
1200
1300
|
const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
|
|
1201
1301
|
debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
|
|
1202
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1302
|
+
const response = await timedSendMessage(driver, [img], prompt, askSchemaOptions);
|
|
1203
1303
|
debugLog(resolvedConfig, "ask response", response.text, "response");
|
|
1204
1304
|
const result = parseAskResponse(response.text);
|
|
1205
1305
|
return {
|
|
@@ -1216,7 +1316,7 @@ function visualAI(config = {}) {
|
|
|
1216
1316
|
instructions: options?.instructions
|
|
1217
1317
|
});
|
|
1218
1318
|
debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
|
|
1219
|
-
const response = await timedSendMessage(driver, [imgA, imgB], prompt);
|
|
1319
|
+
const response = await timedSendMessage(driver, [imgA, imgB], prompt, compareSchemaOptions);
|
|
1220
1320
|
debugLog(resolvedConfig, "compare response", response.text, "response");
|
|
1221
1321
|
const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
|
|
1222
1322
|
const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
|
|
@@ -1251,7 +1351,7 @@ function visualAI(config = {}) {
|
|
|
1251
1351
|
const img = await normalizeImage(image);
|
|
1252
1352
|
const prompt = buildAccessibilityPrompt(options);
|
|
1253
1353
|
debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
|
|
1254
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1354
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1255
1355
|
debugLog(resolvedConfig, "accessibility response", response.text, "response");
|
|
1256
1356
|
const result = parseCheckResponse(response.text);
|
|
1257
1357
|
return {
|
|
@@ -1270,7 +1370,7 @@ function visualAI(config = {}) {
|
|
|
1270
1370
|
const img = await normalizeImage(image);
|
|
1271
1371
|
const prompt = buildLayoutPrompt(options);
|
|
1272
1372
|
debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
|
|
1273
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1373
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1274
1374
|
debugLog(resolvedConfig, "layout response", response.text, "response");
|
|
1275
1375
|
const result = parseCheckResponse(response.text);
|
|
1276
1376
|
return {
|
|
@@ -1284,7 +1384,7 @@ function visualAI(config = {}) {
|
|
|
1284
1384
|
const img = await normalizeImage(image);
|
|
1285
1385
|
const prompt = buildPageLoadPrompt(options);
|
|
1286
1386
|
debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
|
|
1287
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1387
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1288
1388
|
debugLog(resolvedConfig, "pageLoad response", response.text, "response");
|
|
1289
1389
|
const result = parseCheckResponse(response.text);
|
|
1290
1390
|
return {
|
|
@@ -1298,7 +1398,7 @@ function visualAI(config = {}) {
|
|
|
1298
1398
|
const img = await normalizeImage(image);
|
|
1299
1399
|
const prompt = buildContentPrompt(options);
|
|
1300
1400
|
debugLog(resolvedConfig, "content prompt", prompt, "prompt");
|
|
1301
|
-
const response = await timedSendMessage(driver, [img], prompt);
|
|
1401
|
+
const response = await timedSendMessage(driver, [img], prompt, checkSchemaOptions);
|
|
1302
1402
|
debugLog(resolvedConfig, "content response", response.text, "response");
|
|
1303
1403
|
const result = parseCheckResponse(response.text);
|
|
1304
1404
|
return {
|
|
@@ -1389,6 +1489,7 @@ export {
|
|
|
1389
1489
|
VisualAIProviderError,
|
|
1390
1490
|
VisualAIRateLimitError,
|
|
1391
1491
|
VisualAIResponseParseError,
|
|
1492
|
+
VisualAITruncationError,
|
|
1392
1493
|
assertVisualCompareResult,
|
|
1393
1494
|
assertVisualResult,
|
|
1394
1495
|
formatCheckResult,
|