visual-ai-assertions 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1349 @@
1
+ // src/constants.ts
2
+ var Provider = {
3
+ ANTHROPIC: "anthropic",
4
+ OPENAI: "openai",
5
+ GOOGLE: "google"
6
+ };
7
+ var Model = {
8
+ Anthropic: {
9
+ OPUS_4_6: "claude-opus-4-6",
10
+ SONNET_4_6: "claude-sonnet-4-6",
11
+ HAIKU_4_5: "claude-haiku-4-5"
12
+ },
13
+ OpenAI: {
14
+ GPT_5_4: "gpt-5.4",
15
+ GPT_5_4_PRO: "gpt-5.4-pro",
16
+ GPT_5_2: "gpt-5.2",
17
+ GPT_5_MINI: "gpt-5-mini"
18
+ },
19
+ Google: {
20
+ GEMINI_3_1_PRO_PREVIEW: "gemini-3.1-pro-preview",
21
+ GEMINI_3_FLASH_PREVIEW: "gemini-3-flash-preview"
22
+ }
23
+ };
24
+ var DEFAULT_MODELS = {
25
+ [Provider.ANTHROPIC]: Model.Anthropic.SONNET_4_6,
26
+ [Provider.OPENAI]: Model.OpenAI.GPT_5_MINI,
27
+ [Provider.GOOGLE]: Model.Google.GEMINI_3_FLASH_PREVIEW
28
+ };
29
+ var DEFAULT_MAX_TOKENS = 4096;
30
+ var MODEL_TO_PROVIDER = new Map([
31
+ ...Object.values(Model.Anthropic).map((m) => [m, Provider.ANTHROPIC]),
32
+ ...Object.values(Model.OpenAI).map((m) => [m, Provider.OPENAI]),
33
+ ...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
34
+ ]);
35
+ var VALID_PROVIDERS = Object.values(Provider);
36
+ var Content = {
37
+ /** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
38
+ PLACEHOLDER_TEXT: "placeholder-text",
39
+ /** Detects error messages, banners, stack traces, or error codes */
40
+ ERROR_MESSAGES: "error-messages",
41
+ /** Detects broken image icons or failed-to-load image indicators */
42
+ BROKEN_IMAGES: "broken-images",
43
+ /** Detects UI elements that unintentionally overlap and obscure content */
44
+ OVERLAPPING_ELEMENTS: "overlapping-elements"
45
+ };
46
+ var Layout = {
47
+ /** Detects elements that unintentionally overlap each other */
48
+ OVERLAP: "overlap",
49
+ /** Detects content cut off or extending beyond container boundaries */
50
+ OVERFLOW: "overflow",
51
+ /** Detects inconsistent alignment of text, images, and UI components */
52
+ ALIGNMENT: "alignment"
53
+ };
54
+ var Accessibility = {
55
+ /** Detects insufficient color contrast between text and backgrounds */
56
+ CONTRAST: "contrast",
57
+ /** Detects text that is cut off, overlapping, too small, or obscured */
58
+ READABILITY: "readability",
59
+ /** Detects interactive elements that are not visually distinct */
60
+ INTERACTIVE_VISIBILITY: "interactive-visibility"
61
+ };
62
+
63
+ // src/errors.ts
64
+ var VisualAIError = class extends Error {
65
+ code;
66
+ constructor(message, code = "VISUAL_AI_ERROR") {
67
+ super(message);
68
+ this.code = code;
69
+ this.name = "VisualAIError";
70
+ Object.setPrototypeOf(this, new.target.prototype);
71
+ }
72
+ };
73
+ var VisualAIAuthError = class extends VisualAIError {
74
+ constructor(message) {
75
+ super(message, "AUTH_FAILED");
76
+ this.name = "VisualAIAuthError";
77
+ }
78
+ };
79
+ var VisualAIRateLimitError = class extends VisualAIError {
80
+ retryAfter;
81
+ constructor(message, retryAfter) {
82
+ super(message, "RATE_LIMITED");
83
+ this.name = "VisualAIRateLimitError";
84
+ this.retryAfter = retryAfter;
85
+ }
86
+ };
87
+ var VisualAIProviderError = class extends VisualAIError {
88
+ statusCode;
89
+ constructor(message, statusCode) {
90
+ super(message, "PROVIDER_ERROR");
91
+ this.name = "VisualAIProviderError";
92
+ this.statusCode = statusCode;
93
+ }
94
+ };
95
+ var VisualAIImageError = class extends VisualAIError {
96
+ constructor(message) {
97
+ super(message, "IMAGE_INVALID");
98
+ this.name = "VisualAIImageError";
99
+ }
100
+ };
101
+ var VisualAIResponseParseError = class extends VisualAIError {
102
+ rawResponse;
103
+ constructor(message, rawResponse) {
104
+ super(message, "RESPONSE_PARSE_FAILED");
105
+ this.name = "VisualAIResponseParseError";
106
+ this.rawResponse = rawResponse;
107
+ }
108
+ };
109
+ var VisualAIConfigError = class extends VisualAIError {
110
+ constructor(message) {
111
+ super(message, "CONFIG_INVALID");
112
+ this.name = "VisualAIConfigError";
113
+ }
114
+ };
115
+ var VisualAIAssertionError = class extends VisualAIError {
116
+ result;
117
+ constructor(message, result) {
118
+ super(message, "ASSERTION_FAILED");
119
+ this.name = "VisualAIAssertionError";
120
+ this.result = result;
121
+ }
122
+ };
123
+ function isVisualAIKnownError(error) {
124
+ return error instanceof VisualAIAuthError || error instanceof VisualAIRateLimitError || error instanceof VisualAIProviderError || error instanceof VisualAIImageError || error instanceof VisualAIResponseParseError || error instanceof VisualAIConfigError || error instanceof VisualAIAssertionError;
125
+ }
126
+
127
+ // src/core/prompt.ts
128
+ var JSON_INSTRUCTIONS = `
129
+ IMPORTANT: You MUST respond with valid JSON only. No markdown, no code blocks, no extra text.
130
+ `;
131
+ var ISSUE_SCHEMA_INSTRUCTIONS = `
132
+ Each issue must have:
133
+ - "priority": "critical" | "major" | "minor"
134
+ - "category": "accessibility" | "missing-element" | "layout" | "content" | "styling" | "functionality" | "performance" | "other"
135
+ - "description": what the issue is
136
+ - "suggestion": how to fix or improve it
137
+ `;
138
+ var CHECK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
139
+ {
140
+ "pass": boolean, // true ONLY if ALL statements are true
141
+ "reasoning": string, // brief overall summary (e.g. "3 of 4 checks passed...")
142
+ "issues": [...], // list of issues found (empty if all pass)
143
+ "statements": [ // one entry per statement, in order
144
+ {
145
+ "statement": string, // the original statement text
146
+ "pass": boolean, // whether this statement is true
147
+ "reasoning": string, // explanation for this statement
148
+ "confidence": "high" | "medium" | "low"
149
+ // high = clearly visible/absent with no ambiguity
150
+ // medium = present but partially obscured, small, or borderline
151
+ // low = cannot determine with certainty from the screenshot
152
+ }
153
+ ]
154
+ }
155
+ ${ISSUE_SCHEMA_INSTRUCTIONS}
156
+
157
+ Only include issues for statements that fail. If all statements pass, issues should be an empty array.
158
+
159
+ Example for a failing check:
160
+ {
161
+ "pass": false,
162
+ "reasoning": "1 of 2 checks failed. The submit button is not visible.",
163
+ "issues": [
164
+ { "priority": "major", "category": "missing-element", "description": "Submit button is not visible on the page", "suggestion": "Verify the submit button component is rendered and not hidden by CSS" }
165
+ ],
166
+ "statements": [
167
+ { "statement": "The page header is visible", "pass": true, "reasoning": "Header with logo is clearly visible at the top", "confidence": "high" },
168
+ { "statement": "The submit button is visible", "pass": false, "reasoning": "No submit button found in the visible area of the page", "confidence": "high" }
169
+ ]
170
+ }
171
+ ${JSON_INSTRUCTIONS}`;
172
+ var ASK_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
173
+ {
174
+ "summary": string, // high-level analysis summary
175
+ "issues": [...] // list of issues/findings, can be empty
176
+ }
177
+ ${ISSUE_SCHEMA_INSTRUCTIONS}
178
+
179
+ Prioritize issues by severity:
180
+ - "critical": blocks functionality, breaks accessibility, data loss risk
181
+ - "major": significant usability or visual problem
182
+ - "minor": cosmetic issue, minor improvement suggestion
183
+
184
+ Example:
185
+ {
186
+ "summary": "Found 2 issues: a critical accessibility problem and a minor cosmetic issue.",
187
+ "issues": [
188
+ { "priority": "critical", "category": "accessibility", "description": "Submit button has insufficient color contrast", "suggestion": "Increase contrast so text is clearly readable against the background" },
189
+ { "priority": "minor", "category": "content", "description": "Placeholder text 'Lorem ipsum' visible in sidebar", "suggestion": "Replace with actual content or remove the placeholder section" }
190
+ ]
191
+ }
192
+ ${JSON_INSTRUCTIONS}`;
193
+ var COMPARE_OUTPUT_SCHEMA = `Respond with a JSON object matching this exact structure:
194
+ {
195
+ "pass": boolean, // true if no critical or major changes found
196
+ "reasoning": string, // overall summary of changes detected
197
+ "changes": [ // list of all visual differences detected (empty if images are identical)
198
+ {
199
+ "description": string, // what changed between the images
200
+ "severity": "critical" | "major" | "minor"
201
+ // critical = element removed, layout broken, functionality lost
202
+ // major = significant visual change that may be unintentional
203
+ // minor = small stylistic difference (color, spacing, font)
204
+ }
205
+ ]
206
+ }
207
+
208
+ If the images appear identical, set pass to true, explain in reasoning, and return an empty changes array.
209
+ ${JSON_INSTRUCTIONS}`;
210
+ var DEFAULT_CHECK_ROLE = "You are a visual QA assistant. Evaluate the provided image precisely and objectively.";
211
+ var DEFAULT_ASK_ROLE = "You are a visual QA assistant. Analyze the provided image based on the user's request.";
212
+ var COMPARE_ROLE = "You are performing a visual regression test. Compare the BEFORE image (baseline) to the AFTER image (current) and identify all visual differences. Flag changes that appear unintentional or problematic.";
213
+ var COMPARE_EDGE_RULES = [
214
+ "The BEFORE image is the baseline/expected state.",
215
+ "Flag removals and layout changes as higher severity. Color and spacing changes are lower severity unless they break readability.",
216
+ "If the images appear identical, report no changes and pass."
217
+ ];
218
+ function buildInstructionsSection(instructions) {
219
+ return "Additional instructions:\n" + instructions.map((instruction) => `- ${instruction}`).join("\n");
220
+ }
221
+ function buildCheckPrompt(statements, options) {
222
+ const stmts = Array.isArray(statements) ? statements : [statements];
223
+ const statementsBlock = stmts.map((s, i) => `${i + 1}. "${s}"`).join("\n");
224
+ const sections = [options?.role ?? DEFAULT_CHECK_ROLE];
225
+ if (options?.instructions && options.instructions.length > 0) {
226
+ sections.push(buildInstructionsSection(options.instructions));
227
+ }
228
+ sections.push(`Statements to evaluate:
229
+ ${statementsBlock}`);
230
+ sections.push(CHECK_OUTPUT_SCHEMA);
231
+ return sections.join("\n\n");
232
+ }
233
+ function buildAskPrompt(userPrompt, options) {
234
+ const sections = [DEFAULT_ASK_ROLE];
235
+ if (options?.instructions && options.instructions.length > 0) {
236
+ sections.push(buildInstructionsSection(options.instructions));
237
+ }
238
+ sections.push(`User request: ${userPrompt}`);
239
+ sections.push(ASK_OUTPUT_SCHEMA);
240
+ return sections.join("\n\n");
241
+ }
242
+ function buildAiDiffPrompt() {
243
+ return `You are given two screenshots of the same page or component.
244
+ Generate a single annotated image that clearly highlights the visual differences between the first and second images.
245
+ - Overlay semi-transparent red rectangles or outlines on areas that changed
246
+ - Keep unchanged areas visible but slightly dimmed
247
+ - The output image should match the dimensions of the input images
248
+ - Focus on meaningful visual differences: layout shifts, missing elements, color changes, text differences`;
249
+ }
250
+ function buildAiDiffCodeExecutionPrompt() {
251
+ return `${buildAiDiffPrompt()}
252
+
253
+ Write Python code using PIL/Pillow for image processing and matplotlib for rendering to accomplish the above.
254
+ Your code MUST load both input images and display the result using matplotlib.`;
255
+ }
256
+ function buildComparePrompt(options) {
257
+ const evaluation = options?.userPrompt ? `User request: ${options.userPrompt}` : "Identify all visual differences between the baseline and current screenshot. Flag any changes that appear unintentional or problematic.";
258
+ const instructions = options?.instructions ? [...COMPARE_EDGE_RULES, ...options.instructions] : COMPARE_EDGE_RULES;
259
+ const sections = [
260
+ COMPARE_ROLE,
261
+ buildInstructionsSection(instructions),
262
+ evaluation,
263
+ COMPARE_OUTPUT_SCHEMA
264
+ ];
265
+ return sections.join("\n\n");
266
+ }
267
+
268
+ // src/templates/elements-visibility.ts
269
+ var ELEMENTS_VISIBLE_ROLE = "Check whether specific UI elements are present and fully visible in this screenshot.";
270
+ var ELEMENTS_HIDDEN_ROLE = "Check whether specific UI elements are absent or hidden in this screenshot.";
271
+ var ELEMENTS_VISIBLE_EDGE_RULES = [
272
+ "If an element is partially visible (cut off by screenshot boundary), it is NOT considered fully visible \u2014 the check for that element should fail. Note the partial visibility in your reasoning."
273
+ ];
274
+ var ELEMENTS_HIDDEN_EDGE_RULES = [
275
+ "If an element is partially visible (cut off by screenshot boundary), it is NOT considered hidden \u2014 the check for that element should fail. Note the partial visibility in your reasoning."
276
+ ];
277
+ function buildElementsVisibilityPrompt(elements, visible, options) {
278
+ const statements = visible ? elements.map((el) => `The element "${el}" is fully visible on the page`) : elements.map((el) => `The element "${el}" is NOT visible on the page`);
279
+ const defaultRules = visible ? ELEMENTS_VISIBLE_EDGE_RULES : ELEMENTS_HIDDEN_EDGE_RULES;
280
+ const instructions = options?.instructions ? [...defaultRules, ...options.instructions] : defaultRules;
281
+ return buildCheckPrompt(statements, {
282
+ role: visible ? ELEMENTS_VISIBLE_ROLE : ELEMENTS_HIDDEN_ROLE,
283
+ instructions
284
+ });
285
+ }
286
+
287
+ // src/templates/accessibility.ts
288
+ var ALL_CHECKS = Object.values(Accessibility);
289
+ var ACCESSIBILITY_ROLE = "Evaluate this screenshot for visual accessibility. Focus on what you can actually perceive \u2014 apparent contrast levels, text legibility, and visual distinctiveness of interactive elements.";
290
+ var ACCESSIBILITY_EDGE_RULES = [
291
+ "Do not state specific contrast ratios. Describe contrast as 'appears sufficient' or 'appears low'.",
292
+ "Dark mode and light mode themes are both valid. Do not flag a valid dark theme as a contrast issue."
293
+ ];
294
+ var CHECK_STATEMENTS = {
295
+ [Accessibility.CONTRAST]: "All text and interactive elements appear to have sufficient color contrast \u2014 text is clearly readable against its background",
296
+ [Accessibility.READABILITY]: "All text is readable \u2014 no text is cut off, overlapping, too small to read, or obscured by background images",
297
+ [Accessibility.INTERACTIVE_VISIBILITY]: "All interactive elements (buttons, links, inputs) are clearly identifiable and visually distinct from non-interactive content"
298
+ };
299
+ function buildAccessibilityPrompt(options) {
300
+ const checks = options?.checks ?? [...ALL_CHECKS];
301
+ const statements = checks.map((c) => CHECK_STATEMENTS[c]);
302
+ const instructions = options?.instructions ? [...ACCESSIBILITY_EDGE_RULES, ...options.instructions] : ACCESSIBILITY_EDGE_RULES;
303
+ return buildCheckPrompt(statements, {
304
+ role: ACCESSIBILITY_ROLE,
305
+ instructions
306
+ });
307
+ }
308
+
309
+ // src/templates/layout.ts
310
+ var ALL_CHECKS2 = Object.values(Layout);
311
+ var LAYOUT_ROLE = "Evaluate this screenshot for visual layout problems \u2014 overlapping elements, content that appears cut off or overflowing, and inconsistent alignment patterns.";
312
+ var LAYOUT_EDGE_RULES = [
313
+ "Intentional overlaps (stacked avatars, dropdown menus, overlapping cards) are acceptable. Flag only overlaps that obscure content or appear broken.",
314
+ "Scrollable containers with partially visible content are not overflow issues."
315
+ ];
316
+ var CHECK_STATEMENTS2 = {
317
+ [Layout.OVERLAP]: "No elements overlap each other unintentionally \u2014 all content is clearly separated and readable",
318
+ [Layout.OVERFLOW]: "No content appears to be unintentionally cut off or extending beyond its container boundaries",
319
+ [Layout.ALIGNMENT]: "Elements are properly aligned \u2014 text, images, and UI components follow a consistent grid or alignment pattern"
320
+ };
321
+ function buildLayoutPrompt(options) {
322
+ const checks = options?.checks ?? [...ALL_CHECKS2];
323
+ const statements = checks.map((c) => CHECK_STATEMENTS2[c]);
324
+ const instructions = options?.instructions ? [...LAYOUT_EDGE_RULES, ...options.instructions] : LAYOUT_EDGE_RULES;
325
+ return buildCheckPrompt(statements, { role: LAYOUT_ROLE, instructions });
326
+ }
327
+
328
+ // src/templates/page-load.ts
329
+ var PAGE_LOAD_ROLE = "Evaluate whether this page has finished loading. Look for loading indicators, empty content areas, and missing resources.";
330
+ function buildPageLoadPrompt(options) {
331
+ const expectLoaded = options?.expectLoaded ?? true;
332
+ const statements = expectLoaded ? [
333
+ "The page content has finished loading \u2014 no spinning indicators, skeleton placeholders, or progress bars are visible",
334
+ "The main content area has actual content displayed (not blank or empty)",
335
+ "No broken image icons or missing resource indicators are visible"
336
+ ] : [
337
+ "The page shows a loading state \u2014 loading spinners, skeleton screens, or progress indicators are visible"
338
+ ];
339
+ return buildCheckPrompt(statements, {
340
+ role: PAGE_LOAD_ROLE,
341
+ instructions: options?.instructions
342
+ });
343
+ }
344
+
345
+ // src/templates/content.ts
346
+ var ALL_CHECKS3 = Object.values(Content);
347
+ var CONTENT_ROLE = "Evaluate this screenshot for content quality problems \u2014 placeholder or dummy content, error states, and broken resources that should not appear in a production UI.";
348
+ var CHECK_STATEMENTS3 = {
349
+ [Content.PLACEHOLDER_TEXT]: "No placeholder text like 'Lorem ipsum', 'TODO', 'TBD', 'placeholder', or similar dummy content is visible on the page",
350
+ [Content.ERROR_MESSAGES]: "No error messages, error banners, stack traces, or error codes are visible on the page",
351
+ [Content.BROKEN_IMAGES]: "No broken image icons, missing image placeholders, or failed-to-load image indicators are visible",
352
+ [Content.OVERLAPPING_ELEMENTS]: "No UI elements are unintentionally overlapping each other, obscuring text, buttons, or other interactive content"
353
+ };
354
+ function buildContentPrompt(options) {
355
+ const checks = options?.checks ?? [...ALL_CHECKS3];
356
+ const statements = checks.map((c) => CHECK_STATEMENTS3[c]);
357
+ return buildCheckPrompt(statements, {
358
+ role: CONTENT_ROLE,
359
+ instructions: options?.instructions
360
+ });
361
+ }
362
+
363
+ // src/providers/error-mapper.ts
364
+ function mapProviderError(err) {
365
+ if (!(err instanceof Error)) {
366
+ return new VisualAIProviderError(String(err));
367
+ }
368
+ const status = err.status;
369
+ if (status === 401 || status === 403) {
370
+ return new VisualAIAuthError(err.message);
371
+ }
372
+ if (status === 429) {
373
+ const headers = err.headers;
374
+ const retryAfter = parseRetryAfter(headers?.["retry-after"]);
375
+ return new VisualAIRateLimitError(err.message, retryAfter);
376
+ }
377
+ if (status !== void 0) {
378
+ return new VisualAIProviderError(err.message, status);
379
+ }
380
+ return new VisualAIProviderError(err.message);
381
+ }
382
+ function parseRetryAfter(value) {
383
+ if (!value) return void 0;
384
+ const seconds = Number(value);
385
+ return Number.isFinite(seconds) ? seconds : void 0;
386
+ }
387
+
388
+ // src/providers/anthropic.ts
389
+ var AnthropicDriver = class {
390
+ client;
391
+ model;
392
+ maxTokens;
393
+ apiKeyOrEnv;
394
+ reasoningEffort;
395
+ constructor(config) {
396
+ this.model = config.model;
397
+ this.maxTokens = config.maxTokens;
398
+ this.client = null;
399
+ this.apiKeyOrEnv = config.apiKey;
400
+ this.reasoningEffort = config.reasoningEffort;
401
+ }
402
+ async getClient() {
403
+ if (this.client) return this.client;
404
+ let Anthropic;
405
+ try {
406
+ const mod = await import("@anthropic-ai/sdk");
407
+ Anthropic = mod.default;
408
+ } catch {
409
+ throw new VisualAIConfigError(
410
+ "Anthropic SDK not installed. Run: npm install @anthropic-ai/sdk"
411
+ );
412
+ }
413
+ const apiKey = this.apiKeyOrEnv ?? process.env.ANTHROPIC_API_KEY;
414
+ if (!apiKey) {
415
+ throw new VisualAIAuthError(
416
+ "Anthropic API key not found. Set ANTHROPIC_API_KEY or pass apiKey in config."
417
+ );
418
+ }
419
+ this.client = new Anthropic({ apiKey });
420
+ return this.client;
421
+ }
422
+ async sendMessage(images, prompt) {
423
+ const client = await this.getClient();
424
+ const imageBlocks = images.map((img) => ({
425
+ type: "image",
426
+ source: {
427
+ type: "base64",
428
+ media_type: img.mimeType,
429
+ data: img.base64
430
+ }
431
+ }));
432
+ try {
433
+ const requestParams = {
434
+ model: this.model,
435
+ max_tokens: this.maxTokens,
436
+ messages: [
437
+ {
438
+ role: "user",
439
+ content: [...imageBlocks, { type: "text", text: prompt }]
440
+ }
441
+ ]
442
+ };
443
+ if (this.reasoningEffort) {
444
+ requestParams.thinking = { type: "adaptive" };
445
+ requestParams.output_config = {
446
+ effort: this.reasoningEffort === "xhigh" ? "max" : this.reasoningEffort
447
+ };
448
+ }
449
+ const message = await client.messages.create(requestParams);
450
+ const textBlock = message.content.find((block) => block.type === "text");
451
+ const text = textBlock?.text ?? "";
452
+ return {
453
+ text,
454
+ usage: {
455
+ inputTokens: message.usage.input_tokens,
456
+ outputTokens: message.usage.output_tokens
457
+ }
458
+ };
459
+ } catch (err) {
460
+ throw mapProviderError(err);
461
+ }
462
+ }
463
+ };
464
+
465
+ // src/providers/google.ts
466
+ var DEFAULT_IMAGE_GEN_MODEL = "gemini-2.5-flash-image";
467
+ function needsCodeExecution(model) {
468
+ const match = model.match(/^gemini-(\d+)/);
469
+ return match !== null && match[1] !== void 0 && parseInt(match[1], 10) >= 3;
470
+ }
471
+ var GOOGLE_THINKING_BUDGET = {
472
+ low: 1024,
473
+ medium: 8192,
474
+ high: 24576,
475
+ xhigh: 24576
476
+ };
477
+ var GoogleDriver = class {
478
+ client;
479
+ model;
480
+ maxTokens;
481
+ apiKeyOrEnv;
482
+ reasoningEffort;
483
+ constructor(config) {
484
+ this.model = config.model;
485
+ this.maxTokens = config.maxTokens;
486
+ this.client = null;
487
+ this.apiKeyOrEnv = config.apiKey;
488
+ this.reasoningEffort = config.reasoningEffort;
489
+ }
490
+ toGeminiParts(images) {
491
+ return images.map((img) => ({
492
+ inlineData: { data: img.base64, mimeType: img.mimeType }
493
+ }));
494
+ }
495
+ async getClient() {
496
+ if (this.client) return this.client;
497
+ let GoogleGenAI;
498
+ try {
499
+ const mod = await import("@google/genai");
500
+ GoogleGenAI = mod.GoogleGenAI;
501
+ } catch {
502
+ throw new VisualAIConfigError(
503
+ "Google GenAI SDK not installed. Run: npm install @google/genai"
504
+ );
505
+ }
506
+ const apiKey = this.apiKeyOrEnv ?? process.env.GOOGLE_API_KEY;
507
+ if (!apiKey) {
508
+ throw new VisualAIAuthError(
509
+ "Google API key not found. Set GOOGLE_API_KEY or pass apiKey in config."
510
+ );
511
+ }
512
+ this.client = new GoogleGenAI({ apiKey });
513
+ return this.client;
514
+ }
515
+ async sendMessage(images, prompt) {
516
+ const client = await this.getClient();
517
+ try {
518
+ const response = await client.models.generateContent({
519
+ model: this.model,
520
+ contents: [...this.toGeminiParts(images), prompt],
521
+ config: {
522
+ responseMimeType: "application/json",
523
+ maxOutputTokens: this.maxTokens,
524
+ ...this.reasoningEffort && {
525
+ thinkingConfig: {
526
+ thinkingBudget: GOOGLE_THINKING_BUDGET[this.reasoningEffort]
527
+ }
528
+ }
529
+ }
530
+ });
531
+ const text = response.text ?? "";
532
+ return {
533
+ text,
534
+ usage: response.usageMetadata ? {
535
+ inputTokens: response.usageMetadata.promptTokenCount ?? 0,
536
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
537
+ } : void 0
538
+ };
539
+ } catch (err) {
540
+ throw mapProviderError(err);
541
+ }
542
+ }
543
+ async generateImage(images, prompt, options) {
544
+ const client = await this.getClient();
545
+ const imageModel = options?.model ?? DEFAULT_IMAGE_GEN_MODEL;
546
+ const resolvedPrompt = options?.promptKind === "ai-diff" && needsCodeExecution(imageModel) ? buildAiDiffCodeExecutionPrompt() : prompt;
547
+ const config = needsCodeExecution(imageModel) ? { tools: [{ codeExecution: {} }] } : { responseModalities: ["TEXT", "IMAGE"] };
548
+ try {
549
+ const response = await client.models.generateContent({
550
+ model: imageModel,
551
+ contents: [...this.toGeminiParts(images), resolvedPrompt],
552
+ config
553
+ });
554
+ const parts = response.candidates?.[0]?.content?.parts;
555
+ if (!parts) {
556
+ throw new VisualAIProviderError("Gemini image generation returned no response parts");
557
+ }
558
+ const imagePart = parts.find((p) => p.inlineData?.data);
559
+ if (!imagePart?.inlineData) {
560
+ throw new VisualAIProviderError(
561
+ "Gemini image generation returned no image data. Ensure the model supports image output."
562
+ );
563
+ }
564
+ return {
565
+ imageData: Buffer.from(imagePart.inlineData.data, "base64"),
566
+ mimeType: imagePart.inlineData.mimeType,
567
+ usage: response.usageMetadata ? {
568
+ inputTokens: response.usageMetadata.promptTokenCount ?? 0,
569
+ outputTokens: response.usageMetadata.candidatesTokenCount ?? 0
570
+ } : void 0
571
+ };
572
+ } catch (err) {
573
+ if (err instanceof VisualAIProviderError) throw err;
574
+ throw mapProviderError(err);
575
+ }
576
+ }
577
+ };
578
+
579
+ // src/providers/openai.ts
580
+ var OpenAIDriver = class {
581
+ client;
582
+ model;
583
+ maxTokens;
584
+ apiKeyOrEnv;
585
+ reasoningEffort;
586
+ constructor(config) {
587
+ this.model = config.model;
588
+ this.maxTokens = config.maxTokens;
589
+ this.client = null;
590
+ this.apiKeyOrEnv = config.apiKey;
591
+ this.reasoningEffort = config.reasoningEffort;
592
+ }
593
+ async getClient() {
594
+ if (this.client) return this.client;
595
+ let OpenAI;
596
+ try {
597
+ const mod = await import("openai");
598
+ OpenAI = mod.default;
599
+ } catch {
600
+ throw new VisualAIConfigError("OpenAI SDK not installed. Run: npm install openai");
601
+ }
602
+ const apiKey = this.apiKeyOrEnv ?? process.env.OPENAI_API_KEY;
603
+ if (!apiKey) {
604
+ throw new VisualAIAuthError(
605
+ "OpenAI API key not found. Set OPENAI_API_KEY or pass apiKey in config."
606
+ );
607
+ }
608
+ this.client = new OpenAI({ apiKey });
609
+ return this.client;
610
+ }
611
+ async sendMessage(images, prompt) {
612
+ const client = await this.getClient();
613
+ const imageBlocks = images.map((img) => ({
614
+ type: "input_image",
615
+ image_url: `data:${img.mimeType};base64,${img.base64}`
616
+ }));
617
+ try {
618
+ const requestParams = {
619
+ model: this.model,
620
+ max_output_tokens: this.maxTokens,
621
+ text: { format: { type: "json_object" } },
622
+ input: [
623
+ {
624
+ role: "user",
625
+ content: [...imageBlocks, { type: "input_text", text: prompt }]
626
+ }
627
+ ]
628
+ };
629
+ if (this.reasoningEffort) {
630
+ requestParams.reasoning = { effort: this.reasoningEffort };
631
+ }
632
+ const response = await client.responses.create(requestParams);
633
+ const text = response.output_text ?? "";
634
+ return {
635
+ text,
636
+ usage: response.usage ? {
637
+ inputTokens: response.usage.input_tokens,
638
+ outputTokens: response.usage.output_tokens
639
+ } : void 0
640
+ };
641
+ } catch (err) {
642
+ throw mapProviderError(err);
643
+ }
644
+ }
645
+ };
646
+
647
+ // src/core/config.ts
648
+ var MODEL_PREFIX_TO_PROVIDER = [
649
+ ["claude-", "anthropic"],
650
+ ["gpt-", "openai"],
651
+ ["o1-", "openai"],
652
+ ["o3-", "openai"],
653
+ ["o4-", "openai"],
654
+ ["gemini-", "google"]
655
+ ];
656
+ function inferProviderFromModel(model) {
657
+ const known = MODEL_TO_PROVIDER.get(model);
658
+ if (known) return known;
659
+ const prefixMatch = MODEL_PREFIX_TO_PROVIDER.find(([prefix]) => model.startsWith(prefix));
660
+ return prefixMatch?.[1];
661
+ }
662
+ function resolveProvider(config) {
663
+ if (config.provider) {
664
+ if (!VALID_PROVIDERS.includes(config.provider)) {
665
+ throw new VisualAIConfigError(
666
+ `Unknown provider: "${config.provider}". Supported: ${VALID_PROVIDERS.join(", ")}`
667
+ );
668
+ }
669
+ return config.provider;
670
+ }
671
+ const envProvider = process.env.VISUAL_AI_PROVIDER;
672
+ if (envProvider) {
673
+ if (VALID_PROVIDERS.includes(envProvider)) {
674
+ return envProvider;
675
+ }
676
+ throw new VisualAIConfigError(
677
+ `Invalid VISUAL_AI_PROVIDER: "${envProvider}". Supported: ${VALID_PROVIDERS.join(", ")}`
678
+ );
679
+ }
680
+ const model = config.model ?? process.env.VISUAL_AI_MODEL;
681
+ if (model) {
682
+ const inferred = inferProviderFromModel(model);
683
+ if (inferred) return inferred;
684
+ }
685
+ const apiKeyProviderMap = [
686
+ ["ANTHROPIC_API_KEY", "anthropic"],
687
+ ["OPENAI_API_KEY", "openai"],
688
+ ["GOOGLE_API_KEY", "google"]
689
+ ];
690
+ const detected = apiKeyProviderMap.find(([key]) => process.env[key]);
691
+ if (detected) return detected[1];
692
+ throw new VisualAIConfigError(
693
+ "No provider specified. Set `provider` in config, `VISUAL_AI_PROVIDER` env variable, or an API key env variable (ANTHROPIC_API_KEY, OPENAI_API_KEY, GOOGLE_API_KEY)."
694
+ );
695
+ }
696
+ function parseBooleanEnv(envName, value) {
697
+ if (value === void 0 || value === "") return void 0;
698
+ const lower = value.toLowerCase();
699
+ if (lower === "true" || lower === "1") return true;
700
+ if (lower === "false" || lower === "0") return false;
701
+ throw new VisualAIConfigError(
702
+ `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
703
+ );
704
+ }
705
+ function resolveConfig(config) {
706
+ const provider = resolveProvider(config);
707
+ const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
708
+ const modelProvider = inferProviderFromModel(model);
709
+ if (modelProvider && modelProvider !== provider) {
710
+ throw new VisualAIConfigError(
711
+ `Model "${model}" appears to be a ${modelProvider} model, but provider is "${provider}". Either change the provider or use a ${provider}-compatible model.`
712
+ );
713
+ }
714
+ return {
715
+ provider,
716
+ apiKey: config.apiKey,
717
+ model,
718
+ maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
719
+ reasoningEffort: config.reasoningEffort,
720
+ debug: config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false,
721
+ trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
722
+ };
723
+ }
724
+
725
+ // src/core/pricing.ts
726
+ var PER_MILLION = 1e6;
727
+ var PRICING_TABLE = {
728
+ [`${Provider.ANTHROPIC}:${Model.Anthropic.OPUS_4_6}`]: {
729
+ inputPricePerToken: 5 / PER_MILLION,
730
+ outputPricePerToken: 25 / PER_MILLION
731
+ },
732
+ [`${Provider.ANTHROPIC}:${Model.Anthropic.SONNET_4_6}`]: {
733
+ inputPricePerToken: 3 / PER_MILLION,
734
+ outputPricePerToken: 15 / PER_MILLION
735
+ },
736
+ [`${Provider.ANTHROPIC}:${Model.Anthropic.HAIKU_4_5}`]: {
737
+ inputPricePerToken: 1 / PER_MILLION,
738
+ outputPricePerToken: 5 / PER_MILLION
739
+ },
740
+ [`${Provider.OPENAI}:${Model.OpenAI.GPT_5_4}`]: {
741
+ inputPricePerToken: 2.5 / PER_MILLION,
742
+ outputPricePerToken: 15 / PER_MILLION
743
+ },
744
+ [`${Provider.OPENAI}:${Model.OpenAI.GPT_5_4_PRO}`]: {
745
+ inputPricePerToken: 30 / PER_MILLION,
746
+ outputPricePerToken: 180 / PER_MILLION
747
+ },
748
+ [`${Provider.OPENAI}:${Model.OpenAI.GPT_5_2}`]: {
749
+ inputPricePerToken: 1.75 / PER_MILLION,
750
+ outputPricePerToken: 14 / PER_MILLION
751
+ },
752
+ [`${Provider.OPENAI}:${Model.OpenAI.GPT_5_MINI}`]: {
753
+ inputPricePerToken: 0.25 / PER_MILLION,
754
+ outputPricePerToken: 2 / PER_MILLION
755
+ },
756
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_1_PRO_PREVIEW}`]: {
757
+ inputPricePerToken: 2 / PER_MILLION,
758
+ outputPricePerToken: 12 / PER_MILLION
759
+ },
760
+ [`${Provider.GOOGLE}:${Model.Google.GEMINI_3_FLASH_PREVIEW}`]: {
761
+ inputPricePerToken: 0.5 / PER_MILLION,
762
+ outputPricePerToken: 3 / PER_MILLION
763
+ }
764
+ };
765
+ function calculateCost(provider, model, inputTokens, outputTokens) {
766
+ const key = `${provider}:${model}`;
767
+ const pricing = PRICING_TABLE[key];
768
+ if (!pricing) return void 0;
769
+ return inputTokens * pricing.inputPricePerToken + outputTokens * pricing.outputPricePerToken;
770
+ }
771
+
772
+ // src/core/debug.ts
773
+ function debugLog(config, label, data) {
774
+ if (config.debug) {
775
+ process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
776
+ `);
777
+ }
778
+ }
779
+ function usageLog(config, method, usage) {
780
+ if (!config.trackUsage) return;
781
+ const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
782
+ process.stderr.write(
783
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
784
+ `
785
+ );
786
+ }
787
+ function processUsage(method, rawUsage, durationSeconds, config) {
788
+ const inputTokens = rawUsage?.inputTokens ?? 0;
789
+ const outputTokens = rawUsage?.outputTokens ?? 0;
790
+ const usage = {
791
+ inputTokens,
792
+ outputTokens,
793
+ estimatedCost: calculateCost(config.provider, config.model, inputTokens, outputTokens),
794
+ durationSeconds
795
+ };
796
+ usageLog(config, method, usage);
797
+ return usage;
798
+ }
799
+ async function timedSendMessage(driver, images, prompt) {
800
+ const start = performance.now();
801
+ const response = await driver.sendMessage(images, prompt);
802
+ const durationSeconds = (performance.now() - start) / 1e3;
803
+ return { ...response, durationSeconds };
804
+ }
805
+
806
+ // src/core/diff.ts
807
+ import sharp from "sharp";
808
+ async function generateAiDiff(imgA, imgB, model, driver) {
809
+ if (!driver.generateImage) {
810
+ throw new VisualAIConfigError(
811
+ "AI-generated diff images require a provider that supports image generation. Currently only the Google (Gemini) provider supports this."
812
+ );
813
+ }
814
+ if (model !== Model.Google.GEMINI_3_FLASH_PREVIEW) {
815
+ throw new VisualAIConfigError(
816
+ "Annotated diff images are only supported when visualAI is configured with the Google model gemini-3-flash-preview."
817
+ );
818
+ }
819
+ const response = await driver.generateImage([imgA, imgB], buildAiDiffPrompt(), {
820
+ model,
821
+ promptKind: "ai-diff"
822
+ });
823
+ const img = sharp(response.imageData);
824
+ const meta = await img.metadata();
825
+ const pngData = await img.png().toBuffer();
826
+ return {
827
+ data: pngData,
828
+ width: meta.width ?? 0,
829
+ height: meta.height ?? 0,
830
+ mimeType: "image/png"
831
+ };
832
+ }
833
+
834
+ // src/core/image.ts
835
+ import { readFile } from "fs/promises";
836
+ import { extname } from "path";
837
+ import sharp2 from "sharp";
838
+ var SUPPORTED_FORMATS = /* @__PURE__ */ new Set([
839
+ "image/jpeg",
840
+ "image/png",
841
+ "image/webp",
842
+ "image/gif"
843
+ ]);
844
+ var EXTENSION_TO_MIME = {
845
+ ".jpg": "image/jpeg",
846
+ ".jpeg": "image/jpeg",
847
+ ".png": "image/png",
848
+ ".webp": "image/webp",
849
+ ".gif": "image/gif"
850
+ };
851
+ var MAX_DIMENSION = 1568;
852
+ var URL_FETCH_TIMEOUT_MS = 1e4;
853
+ function isSupportedMimeType(value) {
854
+ return SUPPORTED_FORMATS.has(value);
855
+ }
856
+ function getMimeFromExtension(filePath) {
857
+ const ext = extname(filePath).toLowerCase();
858
+ return EXTENSION_TO_MIME[ext];
859
+ }
860
+ function isFilePath(input) {
861
+ return input.startsWith("/") || input.startsWith("./") || input.startsWith("../") || input.includes("\\");
862
+ }
863
+ function isUrl(input) {
864
+ return input.startsWith("http://") || input.startsWith("https://");
865
+ }
866
+ function isBase64Image(input) {
867
+ return input.startsWith("iVBOR") || // PNG (0x89 0x50 0x4E 0x47)
868
+ input.startsWith("/9j/") || // JPEG (0xFF 0xD8 0xFF)
869
+ input.startsWith("R0lGOD") || // GIF (0x47 0x49 0x46)
870
+ input.startsWith("UklGR");
871
+ }
872
+ function detectMimeType(data) {
873
+ if (data[0] === 255 && data[1] === 216 && data[2] === 255) {
874
+ return "image/jpeg";
875
+ }
876
+ if (data[0] === 137 && data[1] === 80 && data[2] === 78 && data[3] === 71) {
877
+ return "image/png";
878
+ }
879
+ if (data[0] === 82 && data[1] === 73 && data[2] === 70 && data[3] === 70 && data[8] === 87 && data[9] === 69 && data[10] === 66 && data[11] === 80) {
880
+ return "image/webp";
881
+ }
882
+ if (data[0] === 71 && data[1] === 73 && data[2] === 70) {
883
+ return "image/gif";
884
+ }
885
+ throw new VisualAIImageError("Unable to detect image format from file content");
886
+ }
887
+ async function resizeIfNeeded(data, mimeType) {
888
+ if (mimeType === "image/gif") {
889
+ return data;
890
+ }
891
+ if (mimeType === "image/png" && data.length >= 24) {
892
+ const width2 = data.readUInt32BE(16);
893
+ const height2 = data.readUInt32BE(20);
894
+ if (width2 <= MAX_DIMENSION && height2 <= MAX_DIMENSION) {
895
+ return data;
896
+ }
897
+ }
898
+ const pipeline = sharp2(data);
899
+ const metadata = await pipeline.metadata();
900
+ const width = metadata.width ?? 0;
901
+ const height = metadata.height ?? 0;
902
+ if (width <= MAX_DIMENSION && height <= MAX_DIMENSION) {
903
+ return data;
904
+ }
905
+ return pipeline.resize({
906
+ width: MAX_DIMENSION,
907
+ height: MAX_DIMENSION,
908
+ fit: "inside",
909
+ withoutEnlargement: true
910
+ }).toBuffer();
911
+ }
912
+ async function loadFromFilePath(filePath) {
913
+ let fileData;
914
+ try {
915
+ fileData = await readFile(filePath);
916
+ } catch (err) {
917
+ throw new VisualAIImageError(
918
+ `Failed to read image file: ${filePath} \u2014 ${err instanceof Error ? err.message : String(err)}`
919
+ );
920
+ }
921
+ const mimeType = getMimeFromExtension(filePath) ?? detectMimeType(fileData);
922
+ return { data: fileData, mimeType };
923
+ }
924
+ async function loadFromUrl(url) {
925
+ let response;
926
+ try {
927
+ response = await fetch(url, {
928
+ signal: AbortSignal.timeout(URL_FETCH_TIMEOUT_MS)
929
+ });
930
+ } catch (err) {
931
+ throw new VisualAIImageError(
932
+ `Failed to fetch image from URL: ${url} \u2014 ${err instanceof Error ? err.message : String(err)}`
933
+ );
934
+ }
935
+ if (!response.ok) {
936
+ throw new VisualAIImageError(
937
+ `Failed to fetch image from URL: ${url} \u2014 HTTP ${response.status}`
938
+ );
939
+ }
940
+ const arrayBuffer = await response.arrayBuffer();
941
+ const data = Buffer.from(arrayBuffer);
942
+ const contentType = response.headers.get("content-type")?.split(";")[0]?.trim() ?? null;
943
+ const mimeType = contentType && isSupportedMimeType(contentType) ? contentType : detectMimeType(data);
944
+ return { data, mimeType };
945
+ }
946
+ function loadFromBase64(input) {
947
+ let base64Data = input;
948
+ let mimeType;
949
+ if (input.startsWith("data:")) {
950
+ const match = /^data:(image\/[^;]+);base64,(.+)$/.exec(input);
951
+ if (!match?.[1] || !match[2]) {
952
+ throw new VisualAIImageError("Invalid data URL format");
953
+ }
954
+ if (!isSupportedMimeType(match[1])) {
955
+ throw new VisualAIImageError(`Unsupported image format: ${match[1]}`);
956
+ }
957
+ mimeType = match[1];
958
+ base64Data = match[2];
959
+ }
960
+ if (!/^[A-Za-z0-9+/\n\r]+=*$/.test(base64Data)) {
961
+ throw new VisualAIImageError("Invalid base64 string");
962
+ }
963
+ const data = Buffer.from(base64Data, "base64");
964
+ if (data.length === 0) {
965
+ throw new VisualAIImageError("Empty image data after base64 decode");
966
+ }
967
+ return { data, mimeType: mimeType ?? detectMimeType(data) };
968
+ }
969
+ async function normalizeImage(input) {
970
+ let data;
971
+ let mimeType;
972
+ if (Buffer.isBuffer(input)) {
973
+ mimeType = detectMimeType(input);
974
+ data = input;
975
+ } else if (input instanceof Uint8Array) {
976
+ const buf = Buffer.from(input);
977
+ mimeType = detectMimeType(buf);
978
+ data = buf;
979
+ } else if (typeof input === "string") {
980
+ if (isUrl(input)) {
981
+ ({ data, mimeType } = await loadFromUrl(input));
982
+ } else if (input.startsWith("data:")) {
983
+ ({ data, mimeType } = loadFromBase64(input));
984
+ } else if (isBase64Image(input)) {
985
+ ({ data, mimeType } = loadFromBase64(input));
986
+ } else if (isFilePath(input)) {
987
+ ({ data, mimeType } = await loadFromFilePath(input));
988
+ } else {
989
+ throw new VisualAIImageError(
990
+ `Unrecognized image input: "${input.slice(0, 80)}". Expected a file path, URL, data URL, or base64-encoded image string.`
991
+ );
992
+ }
993
+ } else {
994
+ throw new VisualAIImageError(
995
+ "Invalid image input: expected Buffer, Uint8Array, file path, URL, or base64 string"
996
+ );
997
+ }
998
+ data = await resizeIfNeeded(data, mimeType);
999
+ let cachedBase64;
1000
+ return {
1001
+ data,
1002
+ mimeType,
1003
+ get base64() {
1004
+ if (cachedBase64 === void 0) {
1005
+ cachedBase64 = data.toString("base64");
1006
+ }
1007
+ return cachedBase64;
1008
+ }
1009
+ };
1010
+ }
1011
+
1012
+ // src/types.ts
1013
+ import { z } from "zod";
1014
+ var IssuePrioritySchema = z.enum(["critical", "major", "minor"]);
1015
+ var IssueCategorySchema = z.enum([
1016
+ "accessibility",
1017
+ "missing-element",
1018
+ "layout",
1019
+ "content",
1020
+ "styling",
1021
+ "functionality",
1022
+ "performance",
1023
+ "other"
1024
+ ]);
1025
+ var IssueSchema = z.object({
1026
+ priority: IssuePrioritySchema,
1027
+ category: IssueCategorySchema,
1028
+ description: z.string(),
1029
+ suggestion: z.string()
1030
+ });
1031
+ var ConfidenceSchema = z.enum(["high", "medium", "low"]);
1032
+ var StatementResultSchema = z.object({
1033
+ statement: z.string(),
1034
+ pass: z.boolean(),
1035
+ reasoning: z.string(),
1036
+ confidence: ConfidenceSchema.optional()
1037
+ });
1038
+ var UsageInfoSchema = z.object({
1039
+ inputTokens: z.number(),
1040
+ outputTokens: z.number(),
1041
+ estimatedCost: z.number().optional(),
1042
+ durationSeconds: z.number().nonnegative().optional()
1043
+ });
1044
+ var BaseResultSchema = z.object({
1045
+ pass: z.boolean(),
1046
+ reasoning: z.string(),
1047
+ usage: UsageInfoSchema.optional()
1048
+ });
1049
+ var CheckResultSchema = BaseResultSchema.extend({
1050
+ issues: z.array(IssueSchema),
1051
+ statements: z.array(StatementResultSchema)
1052
+ });
1053
+ var ChangeEntrySchema = z.object({
1054
+ description: z.string(),
1055
+ severity: IssuePrioritySchema
1056
+ });
1057
+ var CompareResultSchema = BaseResultSchema.extend({
1058
+ changes: z.array(ChangeEntrySchema).max(50)
1059
+ });
1060
+ var AskResultSchema = z.object({
1061
+ summary: z.string(),
1062
+ issues: z.array(IssueSchema),
1063
+ usage: UsageInfoSchema.optional()
1064
+ });
1065
+
1066
+ // src/core/response.ts
1067
+ function stripCodeFences(text) {
1068
+ const trimmed = text.trim();
1069
+ const match = /^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/s.exec(trimmed);
1070
+ return match?.[1] ?? trimmed;
1071
+ }
1072
+ var CheckResponseSchema = CheckResultSchema.omit({ usage: true });
1073
+ var AskResponseSchema = AskResultSchema.omit({ usage: true });
1074
+ var CompareResponseSchema = CompareResultSchema.omit({ usage: true });
1075
+ function parseResponse(raw, schema) {
1076
+ let parsed;
1077
+ try {
1078
+ parsed = JSON.parse(stripCodeFences(raw));
1079
+ } catch {
1080
+ throw new VisualAIResponseParseError(
1081
+ `Failed to parse AI response as JSON: ${raw.slice(0, 200)}`,
1082
+ raw
1083
+ );
1084
+ }
1085
+ const result = schema.safeParse(parsed);
1086
+ if (!result.success) {
1087
+ throw new VisualAIResponseParseError(
1088
+ `AI response does not match expected schema: ${result.error.message}`,
1089
+ raw
1090
+ );
1091
+ }
1092
+ return result.data;
1093
+ }
1094
+ function parseCheckResponse(raw) {
1095
+ return parseResponse(raw, CheckResponseSchema);
1096
+ }
1097
+ function parseAskResponse(raw) {
1098
+ return parseResponse(raw, AskResponseSchema);
1099
+ }
1100
+ function parseCompareResponse(raw) {
1101
+ return parseResponse(raw, CompareResponseSchema);
1102
+ }
1103
+
1104
+ // src/core/client.ts
1105
+ var PROVIDER_REGISTRY = {
1106
+ anthropic: (config) => new AnthropicDriver(config),
1107
+ openai: (config) => new OpenAIDriver(config),
1108
+ google: (config) => new GoogleDriver(config)
1109
+ };
1110
+ function createDriver(provider, config) {
1111
+ return PROVIDER_REGISTRY[provider](config);
1112
+ }
1113
+ function visualAI(config = {}) {
1114
+ const resolvedConfig = resolveConfig(config);
1115
+ const driverConfig = {
1116
+ apiKey: resolvedConfig.apiKey,
1117
+ model: resolvedConfig.model,
1118
+ maxTokens: resolvedConfig.maxTokens,
1119
+ reasoningEffort: resolvedConfig.reasoningEffort
1120
+ };
1121
+ const driver = createDriver(resolvedConfig.provider, driverConfig);
1122
+ async function checkElementsVisibility(image, elements, visible, options) {
1123
+ const methodName = visible ? "elementsVisible" : "elementsHidden";
1124
+ if (elements.length === 0) {
1125
+ throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
1126
+ }
1127
+ const img = await normalizeImage(image);
1128
+ const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1129
+ debugLog(resolvedConfig, `${methodName} prompt`, prompt);
1130
+ const response = await timedSendMessage(driver, [img], prompt);
1131
+ debugLog(resolvedConfig, `${methodName} response`, response.text);
1132
+ const result = parseCheckResponse(response.text);
1133
+ return {
1134
+ ...result,
1135
+ usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1136
+ };
1137
+ }
1138
+ return {
1139
+ async check(image, statements, options) {
1140
+ const stmts = Array.isArray(statements) ? statements : [statements];
1141
+ if (stmts.length === 0) {
1142
+ throw new VisualAIConfigError("At least one statement is required for check()");
1143
+ }
1144
+ const img = await normalizeImage(image);
1145
+ const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1146
+ debugLog(resolvedConfig, "check prompt", prompt);
1147
+ const response = await timedSendMessage(driver, [img], prompt);
1148
+ debugLog(resolvedConfig, "check response", response.text);
1149
+ const result = parseCheckResponse(response.text);
1150
+ return {
1151
+ ...result,
1152
+ usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1153
+ };
1154
+ },
1155
+ async ask(image, userPrompt, options) {
1156
+ const img = await normalizeImage(image);
1157
+ const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1158
+ debugLog(resolvedConfig, "ask prompt", prompt);
1159
+ const response = await timedSendMessage(driver, [img], prompt);
1160
+ debugLog(resolvedConfig, "ask response", response.text);
1161
+ const result = parseAskResponse(response.text);
1162
+ return {
1163
+ ...result,
1164
+ usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1165
+ };
1166
+ },
1167
+ async compare(imageA, imageB, options) {
1168
+ const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1169
+ const prompt = buildComparePrompt({
1170
+ userPrompt: options?.prompt,
1171
+ instructions: options?.instructions
1172
+ });
1173
+ debugLog(resolvedConfig, "compare prompt", prompt);
1174
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1175
+ debugLog(resolvedConfig, "compare response", response.text);
1176
+ const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1177
+ const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1178
+ let diffImage;
1179
+ if (effectiveDiffImage) {
1180
+ try {
1181
+ diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1182
+ } catch (err) {
1183
+ const msg = err instanceof Error ? err.message : String(err);
1184
+ debugLog(resolvedConfig, "ai diff error", msg);
1185
+ if (!resolvedConfig.debug) {
1186
+ process.stderr.write(
1187
+ `[visual-ai-assertions] warning: diff generation failed: ${msg}
1188
+ `
1189
+ );
1190
+ }
1191
+ }
1192
+ }
1193
+ const result = parseCompareResponse(response.text);
1194
+ return {
1195
+ ...result,
1196
+ ...diffImage ? { diffImage } : {},
1197
+ usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1198
+ };
1199
+ },
1200
+ elementsVisible(image, elements, options) {
1201
+ return checkElementsVisibility(image, elements, true, options);
1202
+ },
1203
+ elementsHidden(image, elements, options) {
1204
+ return checkElementsVisibility(image, elements, false, options);
1205
+ },
1206
+ async accessibility(image, options) {
1207
+ const img = await normalizeImage(image);
1208
+ const prompt = buildAccessibilityPrompt(options);
1209
+ debugLog(resolvedConfig, "accessibility prompt", prompt);
1210
+ const response = await timedSendMessage(driver, [img], prompt);
1211
+ debugLog(resolvedConfig, "accessibility response", response.text);
1212
+ const result = parseCheckResponse(response.text);
1213
+ return {
1214
+ ...result,
1215
+ usage: processUsage(
1216
+ "accessibility",
1217
+ response.usage,
1218
+ response.durationSeconds,
1219
+ resolvedConfig
1220
+ )
1221
+ };
1222
+ },
1223
+ async layout(image, options) {
1224
+ const img = await normalizeImage(image);
1225
+ const prompt = buildLayoutPrompt(options);
1226
+ debugLog(resolvedConfig, "layout prompt", prompt);
1227
+ const response = await timedSendMessage(driver, [img], prompt);
1228
+ debugLog(resolvedConfig, "layout response", response.text);
1229
+ const result = parseCheckResponse(response.text);
1230
+ return {
1231
+ ...result,
1232
+ usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1233
+ };
1234
+ },
1235
+ async pageLoad(image, options) {
1236
+ const img = await normalizeImage(image);
1237
+ const prompt = buildPageLoadPrompt(options);
1238
+ debugLog(resolvedConfig, "pageLoad prompt", prompt);
1239
+ const response = await timedSendMessage(driver, [img], prompt);
1240
+ debugLog(resolvedConfig, "pageLoad response", response.text);
1241
+ const result = parseCheckResponse(response.text);
1242
+ return {
1243
+ ...result,
1244
+ usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1245
+ };
1246
+ },
1247
+ async content(image, options) {
1248
+ const img = await normalizeImage(image);
1249
+ const prompt = buildContentPrompt(options);
1250
+ debugLog(resolvedConfig, "content prompt", prompt);
1251
+ const response = await timedSendMessage(driver, [img], prompt);
1252
+ debugLog(resolvedConfig, "content response", response.text);
1253
+ const result = parseCheckResponse(response.text);
1254
+ return {
1255
+ ...result,
1256
+ usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1257
+ };
1258
+ }
1259
+ };
1260
+ }
1261
+
1262
+ // src/format.ts
1263
+ function formatCheckResult(result, label) {
1264
+ if (result.pass) {
1265
+ const header2 = label ? `Visual AI Check Passed (${label})` : "Visual AI Check Passed";
1266
+ return `${header2}
1267
+ ${"=".repeat(header2.length)}
1268
+ ${result.reasoning}`;
1269
+ }
1270
+ const header = label ? `Visual AI Check Failed (${label})` : "Visual AI Check Failed";
1271
+ const lines = [header, "=".repeat(header.length), result.reasoning];
1272
+ if (result.statements.length > 0) {
1273
+ lines.push("", "Statements:");
1274
+ for (const s of result.statements) {
1275
+ const status = s.pass ? "PASS" : "FAIL";
1276
+ const confidence = s.confidence ? ` (${s.confidence})` : "";
1277
+ lines.push(` ${status} "${s.statement}"`);
1278
+ lines.push(` ${s.reasoning}${confidence}`);
1279
+ }
1280
+ }
1281
+ if (result.issues.length > 0) {
1282
+ lines.push("", "Issues:");
1283
+ for (const issue of result.issues) {
1284
+ lines.push(` [${issue.priority}/${issue.category}] ${issue.description}`);
1285
+ lines.push(` \u2192 ${issue.suggestion}`);
1286
+ }
1287
+ }
1288
+ return lines.join("\n");
1289
+ }
1290
+ function formatCompareResult(result, label) {
1291
+ const status = result.pass ? "Passed" : "Failed";
1292
+ const header = label ? `Visual AI Compare ${status} (${label})` : `Visual AI Compare ${status}`;
1293
+ const lines = [header, "=".repeat(header.length), result.reasoning];
1294
+ if (result.changes.length > 0) {
1295
+ lines.push("", "Changes:");
1296
+ for (const change of result.changes) {
1297
+ lines.push(` [${change.severity}] ${change.description}`);
1298
+ }
1299
+ }
1300
+ if (result.diffImage) {
1301
+ const { width, height } = result.diffImage;
1302
+ lines.push("", `Diff image: ${width}x${height} (AI-generated)`);
1303
+ }
1304
+ return lines.join("\n");
1305
+ }
1306
+ function assertVisualResult(result, label) {
1307
+ if (!result.pass) {
1308
+ throw new VisualAIAssertionError(formatCheckResult(result, label), result);
1309
+ }
1310
+ }
1311
+ function assertVisualCompareResult(result, label) {
1312
+ if (!result.pass) {
1313
+ throw new VisualAIAssertionError(formatCompareResult(result, label), result);
1314
+ }
1315
+ }
1316
+ export {
1317
+ Accessibility,
1318
+ AskResultSchema,
1319
+ ChangeEntrySchema,
1320
+ CheckResultSchema,
1321
+ CompareResultSchema,
1322
+ ConfidenceSchema,
1323
+ Content,
1324
+ DEFAULT_MODELS,
1325
+ IssueCategorySchema,
1326
+ IssuePrioritySchema,
1327
+ IssueSchema,
1328
+ Layout,
1329
+ Model,
1330
+ Provider,
1331
+ StatementResultSchema,
1332
+ UsageInfoSchema,
1333
+ VALID_PROVIDERS,
1334
+ VisualAIAssertionError,
1335
+ VisualAIAuthError,
1336
+ VisualAIConfigError,
1337
+ VisualAIError,
1338
+ VisualAIImageError,
1339
+ VisualAIProviderError,
1340
+ VisualAIRateLimitError,
1341
+ VisualAIResponseParseError,
1342
+ assertVisualCompareResult,
1343
+ assertVisualResult,
1344
+ formatCheckResult,
1345
+ formatCompareResult,
1346
+ isVisualAIKnownError,
1347
+ visualAI
1348
+ };
1349
+ //# sourceMappingURL=index.js.map