visual-ai-assertions 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -400,7 +400,12 @@ type ReasoningEffort = "low" | "medium" | "high" | "xhigh";
400
400
  interface VisualAIConfig {
401
401
  apiKey?: string;
402
402
  model?: string;
403
+ /** Enable error diagnostic logging to stderr. Does not enable prompt/response logging — use `debugPrompt` and `debugResponse` for that. */
403
404
  debug?: boolean;
405
+ /** Log prompts to stderr. */
406
+ debugPrompt?: boolean;
407
+ /** Log responses to stderr. */
408
+ debugResponse?: boolean;
404
409
  maxTokens?: number;
405
410
  reasoningEffort?: ReasoningEffort;
406
411
  trackUsage?: boolean;
package/dist/index.d.ts CHANGED
@@ -400,7 +400,12 @@ type ReasoningEffort = "low" | "medium" | "high" | "xhigh";
400
400
  interface VisualAIConfig {
401
401
  apiKey?: string;
402
402
  model?: string;
403
+ /** Enable error diagnostic logging to stderr. Does not enable prompt/response logging — use `debugPrompt` and `debugResponse` for that. */
403
404
  debug?: boolean;
405
+ /** Log prompts to stderr. */
406
+ debugPrompt?: boolean;
407
+ /** Log responses to stderr. */
408
+ debugResponse?: boolean;
404
409
  maxTokens?: number;
405
410
  reasoningEffort?: ReasoningEffort;
406
411
  trackUsage?: boolean;
package/dist/index.js CHANGED
@@ -35,6 +35,11 @@ var MODEL_TO_PROVIDER = new Map([
35
35
  ...Object.values(Model.Google).map((m) => [m, Provider.GOOGLE])
36
36
  ]);
37
37
  var VALID_PROVIDERS = Object.values(Provider);
38
+ var PROVIDER_DEFAULT_REASONING = {
39
+ openai: "medium",
40
+ anthropic: "off",
41
+ google: "off"
42
+ };
38
43
  var Content = {
39
44
  /** Detects Lorem ipsum, TODO, TBD, and similar placeholder text */
40
45
  PLACEHOLDER_TEXT: "placeholder-text",
@@ -687,16 +692,38 @@ function parseBooleanEnv(envName, value) {
687
692
  `Invalid ${envName} value: "${value}". Use "true", "1", "false", or "0".`
688
693
  );
689
694
  }
695
+ var VALID_REASONING_EFFORTS = ["low", "medium", "high", "xhigh"];
696
+ function parseReasoningEffortEnv(envName, value) {
697
+ if (value === void 0 || value === "") return void 0;
698
+ const lower = value.toLowerCase();
699
+ if (VALID_REASONING_EFFORTS.includes(lower)) return lower;
700
+ throw new VisualAIConfigError(
701
+ `Invalid ${envName} value: "${value}". Use "low", "medium", "high", or "xhigh".`
702
+ );
703
+ }
704
+ var debugDeprecationWarned = false;
690
705
  function resolveConfig(config) {
691
706
  const provider = resolveProvider(config);
692
707
  const model = config.model ?? process.env.VISUAL_AI_MODEL ?? DEFAULT_MODELS[provider];
708
+ const debug = config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false;
709
+ const debugPrompt = config.debugPrompt ?? parseBooleanEnv("VISUAL_AI_DEBUG_PROMPT", process.env.VISUAL_AI_DEBUG_PROMPT) ?? false;
710
+ const debugResponse = config.debugResponse ?? parseBooleanEnv("VISUAL_AI_DEBUG_RESPONSE", process.env.VISUAL_AI_DEBUG_RESPONSE) ?? false;
711
+ if (debug && !debugPrompt && !debugResponse && !debugDeprecationWarned) {
712
+ debugDeprecationWarned = true;
713
+ process.stderr.write(
714
+ `[visual-ai-assertions] Warning: VISUAL_AI_DEBUG no longer enables prompt/response logging. Use VISUAL_AI_DEBUG_PROMPT=true and/or VISUAL_AI_DEBUG_RESPONSE=true instead.
715
+ `
716
+ );
717
+ }
693
718
  return {
694
719
  provider,
695
720
  apiKey: config.apiKey,
696
721
  model,
697
722
  maxTokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
698
- reasoningEffort: config.reasoningEffort,
699
- debug: config.debug ?? parseBooleanEnv("VISUAL_AI_DEBUG", process.env.VISUAL_AI_DEBUG) ?? false,
723
+ reasoningEffort: config.reasoningEffort ?? parseReasoningEffortEnv("VISUAL_AI_REASONING_EFFORT", process.env.VISUAL_AI_REASONING_EFFORT),
724
+ debug,
725
+ debugPrompt,
726
+ debugResponse,
700
727
  trackUsage: config.trackUsage ?? parseBooleanEnv("VISUAL_AI_TRACK_USAGE", process.env.VISUAL_AI_TRACK_USAGE) ?? false
701
728
  };
702
729
  }
@@ -757,8 +784,9 @@ function calculateCost(provider, model, inputTokens, outputTokens) {
757
784
  }
758
785
 
759
786
  // src/core/debug.ts
760
- function debugLog(config, label, data) {
761
- if (config.debug) {
787
+ function debugLog(config, label, data, kind = "error") {
788
+ const enabled = kind === "prompt" ? config.debugPrompt : kind === "response" ? config.debugResponse : config.debug;
789
+ if (enabled) {
762
790
  process.stderr.write(`[visual-ai-assertions] ${label}: ${data}
763
791
  `);
764
792
  }
@@ -766,8 +794,9 @@ function debugLog(config, label, data) {
766
794
  function usageLog(config, method, usage) {
767
795
  if (!config.trackUsage) return;
768
796
  const costStr = usage.estimatedCost !== void 0 ? `$${usage.estimatedCost.toFixed(6)}` : "unknown";
797
+ const reasoningStr = config.reasoningEffort ? `reasoning: ${config.reasoningEffort}` : `reasoning: ${PROVIDER_DEFAULT_REASONING[config.provider]} (provider default)`;
769
798
  process.stderr.write(
770
- `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}]
799
+ `[visual-ai-assertions] ${method} usage: ${usage.inputTokens} input + ${usage.outputTokens} output tokens (${costStr}) in ${usage.durationSeconds?.toFixed(3) ?? "0.000"}s [${config.model}, ${reasoningStr}]
771
800
  `
772
801
  );
773
802
  }
@@ -783,6 +812,28 @@ function processUsage(method, rawUsage, durationSeconds, config) {
783
812
  usageLog(config, method, usage);
784
813
  return usage;
785
814
  }
815
+ var MAX_RAW_RESPONSE_PREVIEW = 500;
816
+ function formatError(error) {
817
+ if (error instanceof VisualAIResponseParseError) {
818
+ const truncated = error.rawResponse.length > MAX_RAW_RESPONSE_PREVIEW ? error.rawResponse.slice(0, MAX_RAW_RESPONSE_PREVIEW) + "..." : error.rawResponse;
819
+ return `${error.name} (${error.code}): ${error.message}. Raw (truncated): ${truncated}`;
820
+ }
821
+ if (error instanceof VisualAIError) {
822
+ return `${error.name} (${error.code}): ${error.message}`;
823
+ }
824
+ if (error instanceof Error) {
825
+ return `${error.name}: ${error.message}`;
826
+ }
827
+ return String(error);
828
+ }
829
+ async function withErrorDebug(config, method, fn) {
830
+ try {
831
+ return await fn();
832
+ } catch (error) {
833
+ debugLog(config, `${method} error`, formatError(error), "error");
834
+ throw error;
835
+ }
836
+ }
786
837
  async function timedSendMessage(driver, images, prompt) {
787
838
  const start = performance.now();
788
839
  const response = await driver.sendMessage(images, prompt);
@@ -1111,16 +1162,18 @@ function visualAI(config = {}) {
1111
1162
  if (elements.length === 0) {
1112
1163
  throw new VisualAIConfigError(`At least one element is required for ${methodName}()`);
1113
1164
  }
1114
- const img = await normalizeImage(image);
1115
- const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1116
- debugLog(resolvedConfig, `${methodName} prompt`, prompt);
1117
- const response = await timedSendMessage(driver, [img], prompt);
1118
- debugLog(resolvedConfig, `${methodName} response`, response.text);
1119
- const result = parseCheckResponse(response.text);
1120
- return {
1121
- ...result,
1122
- usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1123
- };
1165
+ return withErrorDebug(resolvedConfig, methodName, async () => {
1166
+ const img = await normalizeImage(image);
1167
+ const prompt = buildElementsVisibilityPrompt(elements, visible, options);
1168
+ debugLog(resolvedConfig, `${methodName} prompt`, prompt, "prompt");
1169
+ const response = await timedSendMessage(driver, [img], prompt);
1170
+ debugLog(resolvedConfig, `${methodName} response`, response.text, "response");
1171
+ const result = parseCheckResponse(response.text);
1172
+ return {
1173
+ ...result,
1174
+ usage: processUsage(methodName, response.usage, response.durationSeconds, resolvedConfig)
1175
+ };
1176
+ });
1124
1177
  }
1125
1178
  return {
1126
1179
  async check(image, statements, options) {
@@ -1128,61 +1181,64 @@ function visualAI(config = {}) {
1128
1181
  if (stmts.length === 0) {
1129
1182
  throw new VisualAIConfigError("At least one statement is required for check()");
1130
1183
  }
1131
- const img = await normalizeImage(image);
1132
- const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1133
- debugLog(resolvedConfig, "check prompt", prompt);
1134
- const response = await timedSendMessage(driver, [img], prompt);
1135
- debugLog(resolvedConfig, "check response", response.text);
1136
- const result = parseCheckResponse(response.text);
1137
- return {
1138
- ...result,
1139
- usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1140
- };
1184
+ return withErrorDebug(resolvedConfig, "check", async () => {
1185
+ const img = await normalizeImage(image);
1186
+ const prompt = buildCheckPrompt(stmts, { instructions: options?.instructions });
1187
+ debugLog(resolvedConfig, "check prompt", prompt, "prompt");
1188
+ const response = await timedSendMessage(driver, [img], prompt);
1189
+ debugLog(resolvedConfig, "check response", response.text, "response");
1190
+ const result = parseCheckResponse(response.text);
1191
+ return {
1192
+ ...result,
1193
+ usage: processUsage("check", response.usage, response.durationSeconds, resolvedConfig)
1194
+ };
1195
+ });
1141
1196
  },
1142
1197
  async ask(image, userPrompt, options) {
1143
- const img = await normalizeImage(image);
1144
- const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1145
- debugLog(resolvedConfig, "ask prompt", prompt);
1146
- const response = await timedSendMessage(driver, [img], prompt);
1147
- debugLog(resolvedConfig, "ask response", response.text);
1148
- const result = parseAskResponse(response.text);
1149
- return {
1150
- ...result,
1151
- usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1152
- };
1198
+ return withErrorDebug(resolvedConfig, "ask", async () => {
1199
+ const img = await normalizeImage(image);
1200
+ const prompt = buildAskPrompt(userPrompt, { instructions: options?.instructions });
1201
+ debugLog(resolvedConfig, "ask prompt", prompt, "prompt");
1202
+ const response = await timedSendMessage(driver, [img], prompt);
1203
+ debugLog(resolvedConfig, "ask response", response.text, "response");
1204
+ const result = parseAskResponse(response.text);
1205
+ return {
1206
+ ...result,
1207
+ usage: processUsage("ask", response.usage, response.durationSeconds, resolvedConfig)
1208
+ };
1209
+ });
1153
1210
  },
1154
1211
  async compare(imageA, imageB, options) {
1155
- const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1156
- const prompt = buildComparePrompt({
1157
- userPrompt: options?.prompt,
1158
- instructions: options?.instructions
1159
- });
1160
- debugLog(resolvedConfig, "compare prompt", prompt);
1161
- const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1162
- debugLog(resolvedConfig, "compare response", response.text);
1163
- const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1164
- const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1165
- let diffImage;
1166
- if (effectiveDiffImage) {
1167
- try {
1168
- diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1169
- } catch (err) {
1170
- const msg = err instanceof Error ? err.message : String(err);
1171
- debugLog(resolvedConfig, "ai diff error", msg);
1172
- if (!resolvedConfig.debug) {
1212
+ return withErrorDebug(resolvedConfig, "compare", async () => {
1213
+ const [imgA, imgB] = await Promise.all([normalizeImage(imageA), normalizeImage(imageB)]);
1214
+ const prompt = buildComparePrompt({
1215
+ userPrompt: options?.prompt,
1216
+ instructions: options?.instructions
1217
+ });
1218
+ debugLog(resolvedConfig, "compare prompt", prompt, "prompt");
1219
+ const response = await timedSendMessage(driver, [imgA, imgB], prompt);
1220
+ debugLog(resolvedConfig, "compare response", response.text, "response");
1221
+ const supportsAnnotatedDiff = resolvedConfig.provider === "google" && resolvedConfig.model === Model.Google.GEMINI_3_FLASH_PREVIEW;
1222
+ const effectiveDiffImage = options?.diffImage ?? (supportsAnnotatedDiff ? true : false);
1223
+ let diffImage;
1224
+ if (effectiveDiffImage) {
1225
+ try {
1226
+ diffImage = await generateAiDiff(imgA, imgB, resolvedConfig.model, driver);
1227
+ } catch (err) {
1228
+ const msg = err instanceof Error ? err.message : String(err);
1173
1229
  process.stderr.write(
1174
1230
  `[visual-ai-assertions] warning: diff generation failed: ${msg}
1175
1231
  `
1176
1232
  );
1177
1233
  }
1178
1234
  }
1179
- }
1180
- const result = parseCompareResponse(response.text);
1181
- return {
1182
- ...result,
1183
- ...diffImage ? { diffImage } : {},
1184
- usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1185
- };
1235
+ const result = parseCompareResponse(response.text);
1236
+ return {
1237
+ ...result,
1238
+ ...diffImage ? { diffImage } : {},
1239
+ usage: processUsage("compare", response.usage, response.durationSeconds, resolvedConfig)
1240
+ };
1241
+ });
1186
1242
  },
1187
1243
  elementsVisible(image, elements, options) {
1188
1244
  return checkElementsVisibility(image, elements, true, options);
@@ -1191,57 +1247,65 @@ function visualAI(config = {}) {
1191
1247
  return checkElementsVisibility(image, elements, false, options);
1192
1248
  },
1193
1249
  async accessibility(image, options) {
1194
- const img = await normalizeImage(image);
1195
- const prompt = buildAccessibilityPrompt(options);
1196
- debugLog(resolvedConfig, "accessibility prompt", prompt);
1197
- const response = await timedSendMessage(driver, [img], prompt);
1198
- debugLog(resolvedConfig, "accessibility response", response.text);
1199
- const result = parseCheckResponse(response.text);
1200
- return {
1201
- ...result,
1202
- usage: processUsage(
1203
- "accessibility",
1204
- response.usage,
1205
- response.durationSeconds,
1206
- resolvedConfig
1207
- )
1208
- };
1250
+ return withErrorDebug(resolvedConfig, "accessibility", async () => {
1251
+ const img = await normalizeImage(image);
1252
+ const prompt = buildAccessibilityPrompt(options);
1253
+ debugLog(resolvedConfig, "accessibility prompt", prompt, "prompt");
1254
+ const response = await timedSendMessage(driver, [img], prompt);
1255
+ debugLog(resolvedConfig, "accessibility response", response.text, "response");
1256
+ const result = parseCheckResponse(response.text);
1257
+ return {
1258
+ ...result,
1259
+ usage: processUsage(
1260
+ "accessibility",
1261
+ response.usage,
1262
+ response.durationSeconds,
1263
+ resolvedConfig
1264
+ )
1265
+ };
1266
+ });
1209
1267
  },
1210
1268
  async layout(image, options) {
1211
- const img = await normalizeImage(image);
1212
- const prompt = buildLayoutPrompt(options);
1213
- debugLog(resolvedConfig, "layout prompt", prompt);
1214
- const response = await timedSendMessage(driver, [img], prompt);
1215
- debugLog(resolvedConfig, "layout response", response.text);
1216
- const result = parseCheckResponse(response.text);
1217
- return {
1218
- ...result,
1219
- usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1220
- };
1269
+ return withErrorDebug(resolvedConfig, "layout", async () => {
1270
+ const img = await normalizeImage(image);
1271
+ const prompt = buildLayoutPrompt(options);
1272
+ debugLog(resolvedConfig, "layout prompt", prompt, "prompt");
1273
+ const response = await timedSendMessage(driver, [img], prompt);
1274
+ debugLog(resolvedConfig, "layout response", response.text, "response");
1275
+ const result = parseCheckResponse(response.text);
1276
+ return {
1277
+ ...result,
1278
+ usage: processUsage("layout", response.usage, response.durationSeconds, resolvedConfig)
1279
+ };
1280
+ });
1221
1281
  },
1222
1282
  async pageLoad(image, options) {
1223
- const img = await normalizeImage(image);
1224
- const prompt = buildPageLoadPrompt(options);
1225
- debugLog(resolvedConfig, "pageLoad prompt", prompt);
1226
- const response = await timedSendMessage(driver, [img], prompt);
1227
- debugLog(resolvedConfig, "pageLoad response", response.text);
1228
- const result = parseCheckResponse(response.text);
1229
- return {
1230
- ...result,
1231
- usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1232
- };
1283
+ return withErrorDebug(resolvedConfig, "pageLoad", async () => {
1284
+ const img = await normalizeImage(image);
1285
+ const prompt = buildPageLoadPrompt(options);
1286
+ debugLog(resolvedConfig, "pageLoad prompt", prompt, "prompt");
1287
+ const response = await timedSendMessage(driver, [img], prompt);
1288
+ debugLog(resolvedConfig, "pageLoad response", response.text, "response");
1289
+ const result = parseCheckResponse(response.text);
1290
+ return {
1291
+ ...result,
1292
+ usage: processUsage("pageLoad", response.usage, response.durationSeconds, resolvedConfig)
1293
+ };
1294
+ });
1233
1295
  },
1234
1296
  async content(image, options) {
1235
- const img = await normalizeImage(image);
1236
- const prompt = buildContentPrompt(options);
1237
- debugLog(resolvedConfig, "content prompt", prompt);
1238
- const response = await timedSendMessage(driver, [img], prompt);
1239
- debugLog(resolvedConfig, "content response", response.text);
1240
- const result = parseCheckResponse(response.text);
1241
- return {
1242
- ...result,
1243
- usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1244
- };
1297
+ return withErrorDebug(resolvedConfig, "content", async () => {
1298
+ const img = await normalizeImage(image);
1299
+ const prompt = buildContentPrompt(options);
1300
+ debugLog(resolvedConfig, "content prompt", prompt, "prompt");
1301
+ const response = await timedSendMessage(driver, [img], prompt);
1302
+ debugLog(resolvedConfig, "content response", response.text, "response");
1303
+ const result = parseCheckResponse(response.text);
1304
+ return {
1305
+ ...result,
1306
+ usage: processUsage("content", response.usage, response.durationSeconds, resolvedConfig)
1307
+ };
1308
+ });
1245
1309
  }
1246
1310
  };
1247
1311
  }