npm - @arclabs561/ai-visual-test - Versions diffs - 0.7.3 → 0.7.5 - Mend

@arclabs561/ai-visual-test 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +32 -0
package/README.md +3 -0
package/index.d.ts +181 -3
package/package.json +2 -6
package/src/batch-optimizer.mjs +3 -3
package/src/cache.mjs +3 -4
package/src/calibration-suite.mjs +197 -0
package/src/constants.mjs +11 -0
package/src/cost-optimization.mjs +1 -1
package/src/explanation-manager.mjs +10 -6
package/src/human-validation-manager.mjs +21 -8
package/src/index.mjs +20 -10
package/src/integrations/playwright.mjs +9 -9
package/src/judge.mjs +9 -18
package/src/limitations.mjs +106 -0
package/src/load-env.mjs +3 -2
package/src/model-tier-selector.mjs +1 -1
package/src/rubrics.mjs +22 -2
package/src/score-calibration.mjs +177 -0
package/src/temporal-decision-manager.mjs +1 -1
package/src/temporal-preprocessor.mjs +1 -1
package/src/type-guards.mjs +5 -5
package/src/utils/cached-llm.mjs +1 -1
package/src/validation-result-normalizer.mjs +17 -1
package/src/validation.mjs +13 -13
package/src/validators/index.mjs +23 -2
package/src/pricing.mjs +0 -28
package/src/utils/path-validator.mjs +0 -88
package/src/validation-framework.mjs +0 -325

package/src/human-validation-manager.mjs CHANGED Viewed

@@ -13,11 +13,19 @@ import { warn, log } from './logger.mjs';
 import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from 'fs';
 import { join } from 'path';
-// Lazy import to avoid circular dependencies
+// Lazy import -- evaluation/ directory may not be present (removed from dist)
 let humanValidationModule = null;
+let humanValidationUnavailable = false;
 async function getHumanValidationModule() {
+  if (humanValidationUnavailable) return null;
   if (!humanValidationModule) {
-    humanValidationModule = await import('../evaluation/human-validation/human-validation.mjs');
+    try {
+      humanValidationModule = await import('../evaluation/human-validation/human-validation.mjs');
+    } catch {
+      humanValidationUnavailable = true;
+      warn('[HumanValidation] evaluation/human-validation module not available. Human validation features disabled.');
+      return null;
+    }
   }
   return humanValidationModule;
 }
@@ -101,6 +109,7 @@ export class HumanValidationManager {
    */
   async _saveCalibrationCache() {
     const humanValidation = await getHumanValidationModule();
+    if (!humanValidation) return;
     const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
     if (!this.calibrationCachePath) {
@@ -239,7 +248,7 @@ export class HumanValidationManager {
           };
           const humanValidation = await getHumanValidationModule();
-          humanValidation.collectHumanJudgment(humanJudgment);
+          if (humanValidation) humanValidation.collectHumanJudgment(humanJudgment);
           // Update calibration cache
           this._updateCalibrationCache(vllmJudgment, humanJudgment);
@@ -306,20 +315,20 @@ export class HumanValidationManager {
     try {
       const humanValidation = await getHumanValidationModule();
+      if (!humanValidation) return;
       const humanJudgments = this.calibrationCache.judgments.map(j => j.human);
       const vllmJudgments = this.calibrationCache.judgments.map(j => j.vllm);
       const calibration = humanValidation.compareJudgments(humanJudgments, vllmJudgments);
       this.calibrationCache.lastCalibration = {
         ...calibration,
         timestamp: new Date().toISOString(),
         sampleSize: this.calibrationCache.judgments.length
       };
       // Save calibration results
-      const humanValidationModule = await getHumanValidationModule();
-      humanValidationModule.saveCalibrationResults(calibration);
+      humanValidation.saveCalibrationResults(calibration);
       // Log calibration status
       const correlation = calibration.agreement.pearson;
@@ -485,6 +494,7 @@ export class HumanValidationManager {
    */
   async _saveVLLMJudgments() {
     const humanValidation = await getHumanValidationModule();
+    if (!humanValidation) return;
     const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
     if (!existsSync(VALIDATION_DIR)) {
@@ -521,6 +531,9 @@ export class HumanValidationManager {
    */
   async calibrate() {
     const humanValidation = await getHumanValidationModule();
+    if (!humanValidation) {
+      return { success: false, message: 'Human validation module not available' };
+    }
     const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
     // Load all human judgments

package/src/index.mjs CHANGED Viewed

@@ -17,16 +17,6 @@
 import { loadEnv } from './load-env.mjs';
 loadEnv();
-// Optional: Initialize graceful shutdown (only in Node.js environments, not browser)
-// Use dynamic import to avoid top-level await (fire-and-forget)
-if (typeof process !== 'undefined' && process.env.NODE_ENV !== 'test') {
-  import('./graceful-shutdown.mjs').then(({ initGracefulShutdown }) => {
-    initGracefulShutdown({ timeout: 30000 });
-  }).catch(() => {
-    // Graceful shutdown is optional, don't fail if unavailable
-  });
-}
 import { VLLMJudge, validateScreenshot as _validateScreenshot } from './judge.mjs';
 export { VLLMJudge, _validateScreenshot as validateScreenshot };
@@ -378,6 +368,26 @@ export {
   selectModelTierAndProvider
 } from './model-tier-selector.mjs';
 export { normalizeValidationResult } from './validation-result-normalizer.mjs';
+// Score calibration (per-provider bias correction, arXiv:2601.05114)
+export {
+  calibrateScore,
+  setCalibrationProfile,
+  getCalibrationProfile,
+  resetCalibrationProfiles,
+  deriveCalibrationProfile,
+  analyzeScoreDistribution
+} from './score-calibration.mjs';
+// Meta-evaluation (test the tester, arXiv:2507.10062)
+export { createCalibrationSuite } from './calibration-suite.mjs';
+// Known VLM limitations (arXiv:2501.09236, arXiv:2511.03471)
+export {
+  VLM_LIMITATIONS,
+  getLimitationsForTestType,
+  shouldUseHybridValidation
+} from './limitations.mjs';
 export { CACHE_CONSTANTS, TEMPORAL_CONSTANTS, API_CONSTANTS, UNCERTAINTY_CONSTANTS, BATCH_OPTIMIZER_CONSTANTS } from './constants.mjs';
 export {
   StateValidator,

package/src/integrations/playwright.mjs CHANGED Viewed

@@ -17,6 +17,7 @@
  */
 import { validatePage } from '../convenience.mjs';
+import { ConfigError } from '../errors.mjs';
 /**
  * Create custom matchers for Playwright's expect
@@ -42,7 +43,7 @@ import { validatePage } from '../convenience.mjs';
  */
 export function createMatchers(expect) {
   if (!expect || typeof expect.extend !== 'function') {
-    throw new Error('createMatchers requires Playwright\'s expect object. Import it from @playwright/test');
+    throw new ConfigError('createMatchers requires Playwright\'s expect object. Import it from @playwright/test');
   }
   expect.extend({
     /**
@@ -65,10 +66,15 @@ export function createMatchers(expect) {
         result = await validatePage(target, prompt, options);
       }
+      // Format issues for display
+      const formattedIssues = result.issues?.slice(0, 5).map(issue => {
+        if (typeof issue === 'string') return issue;
+        return JSON.stringify(issue);
+      }).join(', ') || 'none';
       // Handle null scores gracefully (API may be unavailable or validation disabled)
       const pass = result.score !== null && result.score >= minScore;
-      // If score is null, provide helpful error message
       if (result.score === null) {
         return {
           message: () =>
@@ -83,12 +89,6 @@ export function createMatchers(expect) {
         };
       }
-      // Format issues for display
-      const formattedIssues = result.issues?.slice(0, 5).map(issue => {
-        if (typeof issue === 'string') return issue;
-        return JSON.stringify(issue);
-      }).join(', ') || 'none';
       return {
         message: () =>
           `expected visual score to be >= ${minScore}, but got ${result.score}.\nIssues: ${formattedIssues}${result.issues?.length > 5 ? ` (and ${result.issues.length - 5} more)` : ''}\nReasoning: ${result.reasoning?.substring(0, 200)}${result.reasoning?.length > 200 ? '...' : ''}`,

package/src/judge.mjs CHANGED Viewed

@@ -71,23 +71,12 @@ export class VLLMJudge {
     // Note: imagePath may already be validated/resolved from judgeScreenshot
     let validatedPath;
     try {
-      // If path is already absolute (starts with / or is in tmpdir), allow it
-      // This allows legitimate temp files and absolute paths
-      // For relative paths, use standard validation (prevents path traversal)
-      if (imagePath.startsWith('/') || imagePath.startsWith(process.cwd())) {
-        // Absolute path - resolve and validate format only
-        const resolved = resolve(imagePath);
-        // Check if it's a valid image format
-        const validExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp'];
-        const hasValidExtension = validExtensions.some(ext =>
-          resolved.toLowerCase().endsWith(ext)
-        );
-        if (!hasValidExtension) {
-          throw new ValidationError('Invalid image format. Supported: png, jpg, jpeg, gif, webp', resolved);
-        }
-        validatedPath = resolved;
+      // All paths go through validateImagePath for traversal + extension checks.
+      // Absolute paths use their own directory as baseDir so the "within base"
+      // check passes, while still validating extension and normalizing.
+      if (imagePath.startsWith('/')) {
+        validatedPath = validateImagePath(basename(imagePath), { baseDir: dirname(resolve(imagePath)) });
       } else {
-        // Relative path - use standard validation (prevents path traversal)
         validatedPath = validateImagePath(imagePath);
       }
     } catch (validationError) {
@@ -804,6 +793,8 @@ export class VLLMJudge {
         issues: semanticInfo.issues,
         assessment: semanticInfo.assessment,
         reasoning: semanticInfo.reasoning,
+        recommendations: semanticInfo.recommendations || [],
+        strengths: semanticInfo.strengths || [],
         pricing: this.providerConfig.pricing,
         estimatedCost,
         responseTime,
@@ -1067,7 +1058,7 @@ export class VLLMJudge {
       }
       return {
-        score: judgment.score || null,
+        score: judgment.score ?? null,
         issues: issues,
         assessment: judgment.assessment || null,
         reasoning: judgment.reasoning || null,
@@ -1108,7 +1099,7 @@ export class VLLMJudge {
         }
         return {
-          score: parsed.score || null,
+          score: parsed.score ?? null,
           issues: issues,
           assessment: parsed.assessment || null,
           reasoning: parsed.reasoning || null,

package/src/limitations.mjs ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * Known VLM Limitations
+ *
+ * Documents empirically observed blind spots of Vision Language Models
+ * when used as visual test judges. Based on:
+ * - VLM Visual Bug Detection in HTML5 Canvas (arXiv:2501.09236)
+ * - Web Accessibility Audit with MLLMs (arXiv:2511.03471)
+ * - WebAccessVL (arXiv:2602.03850)
+ *
+ * Provides programmatic access so callers can decide when to use
+ * hybrid validators (programmatic + VLM) vs VLM-only.
+ */
+/**
+ * Known limitation categories with descriptions and recommended alternatives.
+ */
+export const VLM_LIMITATIONS = {
+  subtleSpatialShifts: {
+    description: 'VLMs struggle with layout shifts under ~5px. Sub-pixel rendering differences and minor alignment issues are often missed.',
+    severity: 'high',
+    recommendation: 'Use validateElementPosition() or pixel-diff tools for precise layout assertions.',
+    vlmAccuracy: 'low'
+  },
+  elementOverlap: {
+    description: 'Partially overlapping elements are often not detected, especially when the overlap is small or involves transparent regions.',
+    severity: 'medium',
+    recommendation: 'Use validateStateProgrammatic() with bounding-box checks for overlap detection.',
+    vlmAccuracy: 'low'
+  },
+  keyboardNavigation: {
+    description: 'VLMs cannot assess keyboard navigability from a static screenshot. Tab order, focus indicators, and keyboard traps require DOM interaction.',
+    severity: 'high',
+    recommendation: 'Use checkKeyboardNavigation() which tests actual DOM focus behavior.',
+    vlmAccuracy: 'none'
+  },
+  screenReaderOrder: {
+    description: 'Reading order for assistive technology cannot be determined from visual appearance alone. Requires DOM/ARIA analysis.',
+    severity: 'high',
+    recommendation: 'Use validateAccessibilityHybrid() which combines programmatic ARIA checks with VLM visual assessment.',
+    vlmAccuracy: 'none'
+  },
+  colorContrastPrecision: {
+    description: 'VLMs can detect obviously poor contrast but cannot reliably compute exact contrast ratios to WCAG thresholds (4.5:1, 3:1).',
+    severity: 'medium',
+    recommendation: 'Use checkElementContrast() or checkAllTextContrast() for WCAG-precise contrast validation.',
+    vlmAccuracy: 'medium'
+  },
+  dynamicContent: {
+    description: 'Single-screenshot evaluation misses animation timing, transition smoothness, and loading state sequences.',
+    severity: 'medium',
+    recommendation: 'Use captureTemporalScreenshots() or captureAdaptiveTemporalScreenshots() to capture UI across time.',
+    vlmAccuracy: 'low'
+  },
+  textContent: {
+    description: 'VLMs may misread small text, especially at low resolution or with unusual fonts. OCR accuracy decreases below ~12px rendered text.',
+    severity: 'low',
+    recommendation: 'Increase screenshot resolution or provide HTML context via multiModalValidation().',
+    vlmAccuracy: 'medium'
+  },
+  interactiveState: {
+    description: 'Hover states, active states, and focus indicators are not visible in static screenshots unless captured at that exact moment.',
+    severity: 'medium',
+    recommendation: 'Use validateStateHybrid() with explicit state assertions, or capture screenshots during interaction.',
+    vlmAccuracy: 'low'
+  }
+};
+/**
+ * Get limitations relevant to a given test type
+ *
+ * @param {'accessibility' | 'layout' | 'visual' | 'interaction' | 'general'} testType
+ * @returns {Array<{ key: string, description: string, severity: string, recommendation: string, vlmAccuracy: string }>}
+ */
+export function getLimitationsForTestType(testType) {
+  const relevanceMap = {
+    accessibility: ['keyboardNavigation', 'screenReaderOrder', 'colorContrastPrecision'],
+    layout: ['subtleSpatialShifts', 'elementOverlap'],
+    visual: ['colorContrastPrecision', 'textContent', 'dynamicContent'],
+    interaction: ['keyboardNavigation', 'interactiveState', 'dynamicContent'],
+    general: Object.keys(VLM_LIMITATIONS)
+  };
+  const keys = relevanceMap[testType] || relevanceMap.general;
+  return keys.map(key => ({ key, ...VLM_LIMITATIONS[key] }));
+}
+/**
+ * Check if a test type should use hybrid validation
+ *
+ * Returns true if the test type has known VLM blind spots where
+ * hybrid validators would improve accuracy.
+ *
+ * @param {'accessibility' | 'layout' | 'visual' | 'interaction' | 'general'} testType
+ * @returns {boolean}
+ */
+export function shouldUseHybridValidation(testType) {
+  const highSeverityTypes = ['accessibility', 'layout', 'interaction'];
+  return highSeverityTypes.includes(testType);
+}

package/src/load-env.mjs CHANGED Viewed

@@ -9,6 +9,7 @@ import { readFileSync, existsSync } from 'fs';
 import { join, dirname } from 'path';
 import { fileURLToPath } from 'url';
 import { warn } from './logger.mjs';
+import { RATE_LIMIT_BOUNDS } from './constants.mjs';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
@@ -37,8 +38,8 @@ const VALID_PROVIDERS = ['gemini', 'openai', 'claude', 'groq'];
 // Validation functions for environment variables
 function validateRateLimitMaxRequests(value) {
   const num = parseInt(value, 10);
-  if (isNaN(num) || num < 1 || num > 1000) {
-    warn(`[LoadEnv] Invalid RATE_LIMIT_MAX_REQUESTS: ${value}. Must be between 1 and 1000. Using default.`);
+  if (isNaN(num) || num < RATE_LIMIT_BOUNDS.MIN || num > RATE_LIMIT_BOUNDS.MAX) {
+    warn(`[LoadEnv] Invalid RATE_LIMIT_MAX_REQUESTS: ${value}. Must be between ${RATE_LIMIT_BOUNDS.MIN} and ${RATE_LIMIT_BOUNDS.MAX}. Using default.`);
     return null; // Will use default
   }
   return num;

package/src/model-tier-selector.mjs CHANGED Viewed

	@@ -1 +1 @@
1	- function _0x5d6a(_0x1fc55d,_0x5aa380){const _0x215146=_0x2e89();return _0x5d6a=function(_0x51a7ef,_0x713760){_0x51a7ef=_0x51a7ef-0x98;let _0x2e895d=_0x215146[_0x51a7ef];if(_0x5d6a['IMtAko']===undefined){var _0x5d6ad6=function(_0x1f2668){const _0x39d7c6='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x4d2da3='',_0x7771fb='',_0x25d851=_0x4d2da3+_0x5d6ad6;for(let _0x21c946=0x0,_0x1df72e,_0x431060,_0x5382e8=0x0;_0x431060=_0x1f2668['charAt'](_0x5382e8++);~_0x431060&&(_0x1df72e=_0x21c946%0x4?_0x1df72e0x40+_0x431060:_0x431060,_0x21c946++%0x4)?_0x4d2da3+=_0x25d851['charCodeAt'](_0x5382e8+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x1df72e>>(-0x2_0x21c946&0x6)):_0x21c946:0x0){_0x431060=_0x39d7c6['indexOf'](_0x431060);}for(let _0x2a6669=0x0,_0x3e6f1f=_0x4d2da3['length'];_0x2a6669<_0x3e6f1f;_0x2a6669++){_0x7771fb+='%'+('00'+_0x4d2da3['charCodeAt'](_0x2a6669)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x7771fb);};_0x5d6a['lBqhoq']=_0x5d6ad6,_0x1fc55d=arguments,_0x5d6a['IMtAko']=!![];}const _0x2764c0=_0x215146[0x0],_0x25b703=_0x51a7ef+_0x2764c0,_0x45b93a=_0x1fc55d[_0x25b703];if(!_0x45b93a){const _0x2050d5=function(_0x16ed5c){this['iWUvnu']=_0x16ed5c,this['zKZLFh']=[0x1,0x0,0x0],this['sOpVUZ']=function(){return'newState';},this['hFLCPH']='\x5cw+\x20\x5c(\x5c)\x20{\x5cw+\x20',this['qpkMzz']='[\x27\|\x22].+[\x27\|\x22];?\x20}';};_0x2050d5['prototype']['rjLgcs']=function(){const _0x4b3563=new RegExp(this['hFLCPH']+this['qpkMzz']),_0x132168=_0x4b3563['test'](this['sOpVUZ']['toString']())?--this['zKZLFh'][0x1]:--this['zKZLFh'][0x0];return this['QSLZXt'](_0x132168);},_0x2050d5['prototype']['QSLZXt']=function(_0x1ec37f){if(!Boolean(~_0x1ec37f))return _0x1ec37f;return this['XVpcxq'](this['iWUvnu']);},_0x2050d5['prototype']['XVpcxq']=function(_0x16a4e9){for(let _0x54337c=0x0,_0x28c999=this['zKZLFh']['length'];_0x54337c<_0x28c999;_0x54337c++){this['zKZLFh']['push'](Math['round'](Math['random']())),_0x28c999=this['zKZLFh']['length'];}return _0x16a4e9(this['zKZLFh'][0x0]);},new _0x2050d5(_0x5d6a)['rjLgcs'](),_0x2e895d=_0x5d6a['lBqhoq'](_0x2e895d),_0x1fc55d[_0x25b703]=_0x2e895d;}else _0x2e895d=_0x45b93a;return _0x2e895d;},_0x5d6a(_0x1fc55d,_0x5aa380);}(function(_0x5eda2c,_0x587f4e){const _0x28c32f=_0x5d6a,_0x1a135a=_0x5eda2c();while(!![]){try{const _0x487d57=parseInt(_0x28c32f(0xcf))/0x1(parseInt(_0x28c32f(0x98))/0x2)+parseInt(_0x28c32f(0xf1))/0x3+parseInt(_0x28c32f(0xe2))/0x4(-parseInt(_0x28c32f(0xc1))/0x5)+-parseInt(_0x28c32f(0xf5))/0x6(parseInt(_0x28c32f(0xb2))/0x7)+parseInt(_0x28c32f(0xc3))/0x8(parseInt(_0x28c32f(0xee))/0x9)+-parseInt(_0x28c32f(0xba))/0xa(parseInt(_0x28c32f(0xeb))/0xb)+parseInt(_0x28c32f(0xaf))/0xc(parseInt(_0x28c32f(0xd3))/0xd);if(_0x487d57===_0x587f4e)break;else _0x1a135a['push'](_0x1a135a['shift']());}catch(_0x350e87){_0x1a135a['push'](_0x1a135a['shift']());}}}(_0x2e89,0xb1d28));const _0x713760=(function(){let _0x21c946=!![];return function(_0x1df72e,_0x431060){const _0x5382e8=_0x21c946?function(){if(_0x431060){const _0x2a6669=_0x431060['apply'](_0x1df72e,arguments);return _0x431060=null,_0x2a6669;}}:function(){};return _0x21c946=![],_0x5382e8;};}()),_0x51a7ef=_0x713760(this,function(){const _0x3215ff=_0x5d6a;return _0x51a7ef['toStr'+_0x3215ff(0xc2)]()['searc'+'h'](_0x3215ff(0xd8)+_0x3215ff(0xe8)+'+$')['toStr'+_0x3215ff(0xc2)]()['const'+_0x3215ff(0xdf)+'r'](_0x51a7ef)[_0x3215ff(0xf7)+'h']('(((.+'+_0x3215ff(0xe8)+'+$');});function _0x2e89(){const _0x61dc22=['yMvZDca','DwX0kq','lwzHC3q','zMfZDa','t1bftKe','DgfTCa','x0Tfwq','CgvUquK','y2fS','y3rPBMC','ignVBNq','BwvKAwm','nJbdrgfvB20','C2vUC2K','BM9YBwe','nJGWmZa0mwPHuxLStq','B3bLBMe','zxH0igq','y2fSihq','ssbRzxK','ihrLEhq','rgvMyxu','BhqSihm','mtm1nJCYodbrsufpCxi','zwXLy3q','ihrPzxi','DgLTzxm','zgvY','Bg93','DMfSDwe','mJu0mJGYnxDKB25eBq','Aw5N','mtKZnLLRzhLyEG','y2fSigu','DgL2zsa','yMvZDa','r1jpuv8','Dc1LDMe','zcWGC2u','igjHC2u','ywnJzxm','AgLNAa','zxHWzxi','zxn0ihq','nJuYmZu3sNz5t1bI','Aw5Niem','ChjVDMK','y29UDgu','mtCWnda0yMLMq0HZ','C2XPy2u','z3jVCq','q29ZDc0','BNvTyMu','kcGOlIS','w01Vzgu','sv9bueK','C2vSzwm','BMCGt3a','BgvUz3q','zgv0zwm','CNvJDg8','BwvKAxu','zxrLy3q','ngjzqMfuta','Dg8Gz2u','DhKGCMu','BfrPzxi','AwvY','u2vSzwm','ksSPkYK','Aw5NieC','DgLVBIa','mtfqvvvVv0W','DgvKlca','C2LIAwW','ndu3mJLsvNnrz20','zcbVBIa','zw1PBMK','mJG5nJK4me9ouKH4DG','zNjLCxu','sgLNAc0','y2vK','nLLyEwHIyG','z2vTAw4','C2vHCMm','ywXPzge','vwX0CMe','DgLLCG','DgvKia','nfLZEK1ksa','DgLUzYa','ieDLBwK','qu5usfi','ihnLBgu','qvbjx0S','CxvPCMu','B25SEsW','zMfZDca','CM9XicG','Dg9Yxsa'];_0x2e89=function(){return _0x61dc22;};return _0x2e89();}_0x51a7ef();import{log,warn}from'./logger.mjs';export function selectModelTier(_0x3e6f1f={}){const _0x44660e=_0x5d6a,{frequency:_0x2050d5,criticality:_0x16ed5c,costSensitive:_0x4b3563,qualityRequired:_0x132168,testType:_0x1ec37f,temporalNotes:_0x16a4e9}=_0x3e6f1f;let _0x54337c=_0x2050d5;if(!_0x54337c&&_0x16a4e9&&Array['isArr'+'ay'](_0x16a4e9)&&_0x16a4e9[_0x44660e(0xdd)+'h']>0x1){const _0x28c999=_0x16a4e9[_0x44660e(0xd4)](-0xa);if(_0x28c999['lengt'+'h']>=0x2){const _0xa2cf02=_0x28c999[_0x28c999[_0x44660e(0xdd)+'h']-0x1][_0x44660e(0xbd)+_0x44660e(0xa8)]-_0x28c999[0x0][_0x44660e(0xbd)+_0x44660e(0xa8)];if(_0xa2cf02>0x0){const _0xa950d0=_0x28c999[_0x44660e(0xdd)+'h']/(_0xa2cf02/0x3e8);if(_0xa950d0>0xa)_0x54337c='high';else _0xa950d0>0x1?_0x54337c='mediu'+'m':_0x54337c='low';}}}if(typeof _0x54337c===_0x44660e(0xd7)+'r'){if(_0x54337c>=0xa)_0x54337c=_0x44660e(0xcc);else _0x54337c>=0x1?_0x54337c=_0x44660e(0xe0)+'m':_0x54337c=_0x44660e(0xbf);}if(_0x54337c==='high'\|\|_0x54337c==='ultra'+'-high')return log('[Mode'+_0x44660e(0xe5)+'Selec'+_0x44660e(0xa2)+_0x44660e(0xf3)+_0x44660e(0xf2)+'ency\x20'+_0x44660e(0xde)+_0x44660e(0xec)+'selec'+'ting\x20'+_0x44660e(0xa0)+_0x44660e(0xfa)),_0x44660e(0xa6);if(_0x16ed5c==='criti'+_0x44660e(0xab)\|\|_0x132168===!![])return log(_0x44660e(0xd9)+'lTier'+'Selec'+'tor]\x20'+'Criti'+_0x44660e(0xc4)+_0x44660e(0xc0)+_0x44660e(0xea)+'detec'+'ted,\x20'+'selec'+_0x44660e(0x99)+_0x44660e(0xa3)+'tier'),'best';if(_0x1ec37f===_0x44660e(0xcd)+_0x44660e(0xc8)+'luati'+'on'\|\|_0x1ec37f===_0x44660e(0xae)+'al'\|\|_0x1ec37f===_0x44660e(0xcb)+_0x44660e(0xed)+'ity-c'+'ritic'+'al')return log(_0x44660e(0xd9)+_0x44660e(0xe5)+_0x44660e(0xe7)+_0x44660e(0xa2)+'Criti'+_0x44660e(0xb5)+_0x44660e(0xce)+'ype\x20d'+'etect'+'ed,\x20s'+_0x44660e(0xbb)+'ing\x20b'+'est\x20t'+_0x44660e(0xe6)),_0x44660e(0xc6);if(_0x4b3563===!![])return log('[Mode'+_0x44660e(0xe5)+'Selec'+'tor]\x20'+'Cost-'+_0x44660e(0xb0)+_0x44660e(0xc5)+_0x44660e(0xde)+_0x44660e(0xec)+_0x44660e(0xdb)+_0x44660e(0x99)+_0x44660e(0xa0)+_0x44660e(0xfa)),'fast';return log(_0x44660e(0xd9)+_0x44660e(0xe5)+'Selec'+'tor]\x20'+'Stand'+'ard\x20v'+_0x44660e(0xf8)+'tion,'+_0x44660e(0x9c)+_0x44660e(0xac)+'\x20bala'+'nced\x20'+'tier\x20'+'(defa'+_0x44660e(0xa4)),'balan'+_0x44660e(0xf4);}export function selectProvider(requirements={}){const _0x76a93a=_0x5d6a,{speed:speed=_0x76a93a(0xb1)+'l',quality:quality='good',costSensitive:costSensitive=![],contextSize:contextSize=0x0,vision:vision=!![],env:env={}}=requirements;if(speed==='ultra'+_0x76a93a(0xa5)&&!vision){if(env[_0x76a93a(0xc7)+_0x76a93a(0x9d)+'EY'])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+_0x76a93a(0xf9)+'-fast'+_0x76a93a(0xb7)+'-only'+',\x20sel'+'ectin'+'g\x20Gro'+'q'),_0x76a93a(0xd5);}if(contextSize>0x30d40){if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Large'+_0x76a93a(0xad)+_0x76a93a(0xb4)+_0x76a93a(0xe1)+'ed,\x20s'+_0x76a93a(0xbb)+_0x76a93a(0xe9)+_0x76a93a(0xf0)),_0x76a93a(0xf6)+'i';}if(quality==='best'){if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Best\x20'+'quali'+_0x76a93a(0xe4)+_0x76a93a(0x9e)+'d,\x20se'+'lecti'+'ng\x20Ge'+'mini'),'gemin'+'i';if(env[_0x76a93a(0xa7)+'I_API'+'_KEY'])return log(_0x76a93a(0xd9)+'lTier'+'Selec'+_0x76a93a(0xa2)+'Best\x20'+'quali'+_0x76a93a(0xe4)+_0x76a93a(0x9e)+_0x76a93a(0xc9)+'lecti'+_0x76a93a(0xdc)+'enAI'),_0x76a93a(0xb3)+'i';}if(speed==='fast'&&quality==='good'){if(env['GEMIN'+'I_API'+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+_0x76a93a(0xa2)+'Fast\x20'+'+\x20goo'+'d\x20qua'+'lity,'+_0x76a93a(0x9c)+_0x76a93a(0xac)+'\x20Gemi'+'ni'),_0x76a93a(0xf6)+'i';}if(costSensitive){if(env['GEMIN'+_0x76a93a(0xda)+'_KEY'])return log('[Mode'+'lTier'+_0x76a93a(0xe7)+'tor]\x20'+_0x76a93a(0xd6)+'sensi'+'tive,'+_0x76a93a(0x9c)+_0x76a93a(0xac)+_0x76a93a(0x9a)+'ni'),'gemin'+'i';if(env['GROQ_'+_0x76a93a(0x9d)+'EY']&&!vision)return log('[Mode'+'lTier'+'Selec'+_0x76a93a(0xa2)+'Cost-'+_0x76a93a(0xb0)+_0x76a93a(0xc5)+'text-'+_0x76a93a(0x9f)+'\x20sele'+'cting'+'\x20Groq'),'groq';}if(vision&&env[_0x76a93a(0xc7)+_0x76a93a(0x9d)+'EY'])return log('[Mode'+_0x76a93a(0xe5)+'Selec'+'tor]\x20'+_0x76a93a(0xb8)+'lt,\x20s'+'elect'+_0x76a93a(0xe9)+_0x76a93a(0xa1)+'visio'+'n\x20sup'+'porte'+'d)'),_0x76a93a(0xd5);if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Defau'+_0x76a93a(0xb9)+_0x76a93a(0xbb)+_0x76a93a(0xe9)+_0x76a93a(0xf0)),'gemin'+'i';if(env['OPENA'+'I_API'+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+'Selec'+_0x76a93a(0xa2)+_0x76a93a(0xb8)+_0x76a93a(0xb9)+_0x76a93a(0xbb)+'ing\x20O'+_0x76a93a(0xaa)),_0x76a93a(0xb3)+'i';if(env[_0x76a93a(0x9b)+'OPIC_'+'API_K'+'EY'])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Defau'+'lt,\x20s'+_0x76a93a(0xbb)+_0x76a93a(0xd0)+'laude'),'claud'+'e';return warn(_0x76a93a(0xd9)+'lTier'+_0x76a93a(0xe7)+'tor]\x20'+'No\x20AP'+_0x76a93a(0xb6)+'s\x20fou'+'nd,\x20d'+'efaul'+'ting\x20'+_0x76a93a(0xe3)+'mini'),'gemin'+'i';}export function selectModelTierAndProvider(_0x16a51c={}){const _0x5f31e3=_0x5d6a,{requirements:requirements={},..._0x5ead28}=_0x16a51c,_0x173dbd=selectModelTier(_0x5ead28),_0x440d7c={...requirements};_0x440d7c['env']=process['env'];const _0x4db66b=selectProvider(_0x440d7c),_0x1f5054={};return _0x1f5054['tier']=_0x173dbd,_0x1f5054[_0x5f31e3(0xd1)+_0x5f31e3(0xbe)]=_0x4db66b,_0x1f5054['reaso'+'n']='Selec'+_0x5f31e3(0xfb)+_0x4db66b+'\x20'+_0x173dbd+(_0x5f31e3(0xbc)+_0x5f31e3(0xca)+_0x5f31e3(0xef)+_0x5f31e3(0xd2)+'xt'),_0x1f5054;}
1	+ function _0x2c37(_0x4f15c3,_0x20b3ef){const _0x3ad6ad=_0x5bf3();return _0x2c37=function(_0x3900cc,_0x52fe26){_0x3900cc=_0x3900cc-0x132;let _0x5bf331=_0x3ad6ad[_0x3900cc];if(_0x2c37['vfJbBS']===undefined){var _0x2c3714=function(_0x1ea92b){const _0x516c15='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x3e3d10='',_0x40ef71='',_0x35b26c=_0x3e3d10+_0x2c3714;for(let _0x367b8c=0x0,_0x2614ab,_0x37536b,_0x3f8227=0x0;_0x37536b=_0x1ea92b['charAt'](_0x3f8227++);~_0x37536b&&(_0x2614ab=_0x367b8c%0x4?_0x2614ab0x40+_0x37536b:_0x37536b,_0x367b8c++%0x4)?_0x3e3d10+=_0x35b26c['charCodeAt'](_0x3f8227+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x2614ab>>(-0x2_0x367b8c&0x6)):_0x367b8c:0x0){_0x37536b=_0x516c15['indexOf'](_0x37536b);}for(let _0x487a7b=0x0,_0x369a53=_0x3e3d10['length'];_0x487a7b<_0x369a53;_0x487a7b++){_0x40ef71+='%'+('00'+_0x3e3d10['charCodeAt'](_0x487a7b)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x40ef71);};_0x2c37['epjGII']=_0x2c3714,_0x4f15c3=arguments,_0x2c37['vfJbBS']=!![];}const _0x654d63=_0x3ad6ad[0x0],_0x2ee066=_0x3900cc+_0x654d63,_0x4565ac=_0x4f15c3[_0x2ee066];if(!_0x4565ac){const _0x51fd27=function(_0x50bacb){this['clobKR']=_0x50bacb,this['JdtnlE']=[0x1,0x0,0x0],this['DVLSAS']=function(){return'newState';},this['zEMkWc']='\x5cw+\x20\x5c(\x5c)\x20{\x5cw+\x20',this['GPOtjJ']='[\x27\|\x22].+[\x27\|\x22];?\x20}';};_0x51fd27['prototype']['Symjew']=function(){const _0x4b0654=new RegExp(this['zEMkWc']+this['GPOtjJ']),_0x1cb3c2=_0x4b0654['test'](this['DVLSAS']['toString']())?--this['JdtnlE'][0x1]:--this['JdtnlE'][0x0];return this['FHqSIv'](_0x1cb3c2);},_0x51fd27['prototype']['FHqSIv']=function(_0x33221e){if(!Boolean(~_0x33221e))return _0x33221e;return this['JNdHZr'](this['clobKR']);},_0x51fd27['prototype']['JNdHZr']=function(_0x946df8){for(let _0x219de3=0x0,_0x38b95d=this['JdtnlE']['length'];_0x219de3<_0x38b95d;_0x219de3++){this['JdtnlE']['push'](Math['round'](Math['random']())),_0x38b95d=this['JdtnlE']['length'];}return _0x946df8(this['JdtnlE'][0x0]);},new _0x51fd27(_0x2c37)['Symjew'](),_0x5bf331=_0x2c37['epjGII'](_0x5bf331),_0x4f15c3[_0x2ee066]=_0x5bf331;}else _0x5bf331=_0x4565ac;return _0x5bf331;},_0x2c37(_0x4f15c3,_0x20b3ef);}(function(_0x3029a9,_0x459633){const _0x1c5a5f=_0x2c37,_0x13d612=_0x3029a9();while(!![]){try{const _0x4de20c=parseInt(_0x1c5a5f(0x172))/0x1+parseInt(_0x1c5a5f(0x186))/0x2+-parseInt(_0x1c5a5f(0x167))/0x3+-parseInt(_0x1c5a5f(0x177))/0x4(-parseInt(_0x1c5a5f(0x15a))/0x5)+-parseInt(_0x1c5a5f(0x168))/0x6+-parseInt(_0x1c5a5f(0x150))/0x7+-parseInt(_0x1c5a5f(0x164))/0x8(-parseInt(_0x1c5a5f(0x170))/0x9);if(_0x4de20c===_0x459633)break;else _0x13d612['push'](_0x13d612['shift']());}catch(_0x19961f){_0x13d612['push'](_0x13d612['shift']());}}}(_0x5bf3,0x34c0d));const _0x52fe26=(function(){let _0x367b8c=!![];return function(_0x2614ab,_0x37536b){const _0x3f8227=_0x367b8c?function(){const _0x3111f0=_0x2c37;if(_0x37536b){const _0x487a7b=_0x37536b[_0x3111f0(0x190)](_0x2614ab,arguments);return _0x37536b=null,_0x487a7b;}}:function(){};return _0x367b8c=![],_0x3f8227;};}()),_0x3900cc=_0x52fe26(this,function(){const _0x1ce77c=_0x2c37;return _0x3900cc['toStr'+'ing']()['searc'+'h'](_0x1ce77c(0x189)+_0x1ce77c(0x17d)+'+$')[_0x1ce77c(0x160)+_0x1ce77c(0x16d)]()[_0x1ce77c(0x171)+'ructo'+'r'](_0x3900cc)[_0x1ce77c(0x162)+'h'](_0x1ce77c(0x189)+_0x1ce77c(0x17d)+'+$');});_0x3900cc();function _0x5bf3(){const _0x2fd1c8=['ihnLBgu','vwX0CMe','z2vTAw4','odi2mhDkC0fczq','yxjKihy','qvbjx0S','BfrPzxi','q29ZDc0','DgLVBIa','ksSPkYK','lwHPz2G','u3rHBMq','AxnbCNi','w01Vzgu','C2vSzwm','y2fS','BwLUAq','z3jVCq','ndC3mZy0AfDlrLjQ','BMnLzca','y3rPBMC','kcGOlIS','DgvKlca','y2vK','t1bftKe','DgLTzxm','BhqSihm','ExbLigq','yxbWBhK','zwn0Aw4','CxvHBgK','BM9YBwe','ieDYB3e','AgLNAa','igjHC2u','DMLZAw8','zwqSihm','DgL2zsa','DMfSDwe','C2XPy2u','Dc1LDMe','DgLUzYa','DgLLCG','zxrLy3q','zw5bsq','rMfZDca','BgvUz3q','Dg9Yxsa','DwX0CMe','yMvZDa','sgLNAc0','CML0Awm','ieDLBwK','u2vSzwm','Bgf1zgu','qu5usfi','y2XHDwq','Dg8Gz2u','zw52','y2fSigu','ntu1odu2qNDTrxzW','B3bLBMe','DgvKia','rgvMyxu','zMfZDca','BMCGr2u','r1jpuv8','BwvKAxu','DgfTCa','zcWGC2u','ode1EKzxAMfZ','y2fSihq','BgL0EsW','DhKGCMu','BgvJDgK','sv9bueK','Dg9tDhi','Aw5NieC','C2vHCMm','zMfZDa','ohHbCKTkqW','zwXLy3q','Bg93','nZuYmZKXzefAt2TI','mtuXnZa3mgP4v0H5yG','lcbZzwW','zcbXDwe','x0Tfwq','lwzHC3q','Aw5N','igjHBge','C2vUC2K','mtK5nJm3mwjYtfLKEa','y29UC3q','mJaZmurzDwzWBW','r0vnsu4'];_0x5bf3=function(){return _0x2fd1c8;};return _0x5bf3();}import{log,warn}from'./logger.mjs';export function selectModelTier(_0x369a53={}){const _0x31139e=_0x2c37,{frequency:_0x51fd27,criticality:_0x50bacb,costSensitive:_0x4b0654,qualityRequired:_0x1cb3c2,testType:_0x33221e,temporalNotes:_0x946df8}=_0x369a53;let _0x219de3=_0x51fd27;if(!_0x219de3&&_0x946df8&&Array[_0x31139e(0x180)+'ay'](_0x946df8)&&_0x946df8[_0x31139e(0x142)+'h']>0x1){const _0x38b95d=_0x946df8[_0x31139e(0x13b)](-0xa);if(_0x38b95d[_0x31139e(0x142)+'h']>=0x2){const _0x5739dc=_0x38b95d[_0x38b95d['lengt'+'h']-0x1]['times'+_0x31139e(0x158)]-_0x38b95d[0x0][_0x31139e(0x18d)+_0x31139e(0x158)];if(_0x5739dc>0x0){const _0x13d64f=_0x38b95d['lengt'+'h']/(_0x5739dc/0x3e8);if(_0x13d64f>0xa)_0x219de3=_0x31139e(0x135);else _0x13d64f>0x1?_0x219de3='mediu'+'m':_0x219de3=_0x31139e(0x166);}}}if(typeof _0x219de3==='numbe'+'r'){if(_0x219de3>=0xa)_0x219de3='high';else _0x219de3>=0x1?_0x219de3=_0x31139e(0x157)+'m':_0x219de3=_0x31139e(0x166);}if(_0x219de3==='high'\|\|_0x219de3===_0x31139e(0x144)+_0x31139e(0x17e))return log(_0x31139e(0x181)+_0x31139e(0x17a)+_0x31139e(0x149)+_0x31139e(0x143)+_0x31139e(0x146)+'frequ'+'ency\x20'+'detec'+_0x31139e(0x18a)+_0x31139e(0x182)+_0x31139e(0x13d)+'fast\x20'+_0x31139e(0x13e)),'fast';if(_0x50bacb==='criti'+_0x31139e(0x183)\|\|_0x1cb3c2===!![])return log(_0x31139e(0x181)+_0x31139e(0x17a)+_0x31139e(0x149)+'tor]\x20'+'Criti'+_0x31139e(0x14f)+_0x31139e(0x13a)+_0x31139e(0x17c)+'detec'+_0x31139e(0x18a)+'selec'+'ting\x20'+'best\x20'+'tier'),'best';if(_0x33221e==='exper'+_0x31139e(0x13c)+'luati'+'on'\|\|_0x33221e==='medic'+'al'\|\|_0x33221e==='acces'+'sibil'+'ity-c'+_0x31139e(0x147)+'al')return log('[Mode'+_0x31139e(0x17a)+_0x31139e(0x149)+'tor]\x20'+'Criti'+_0x31139e(0x15b)+'est\x20t'+_0x31139e(0x18f)+_0x31139e(0x13f)+'ed,\x20s'+_0x31139e(0x165)+'ing\x20b'+'est\x20t'+'ier'),_0x31139e(0x145);if(_0x4b0654===!![])return log('[Mode'+_0x31139e(0x17a)+_0x31139e(0x149)+_0x31139e(0x143)+'Cost-'+'sensi'+_0x31139e(0x139)+'detec'+'ted,\x20'+'selec'+_0x31139e(0x13d)+_0x31139e(0x154)+'tier'),'fast';return log('[Mode'+'lTier'+'Selec'+_0x31139e(0x143)+_0x31139e(0x17f)+_0x31139e(0x178)+'alida'+'tion,'+'\x20sele'+'cting'+_0x31139e(0x16e)+_0x31139e(0x187)+'tier\x20'+'(defa'+'ult)'),'balan'+_0x31139e(0x18b);}export function selectProvider(requirements={}){const _0x5774a3=_0x2c37,{speed:speed=_0x5774a3(0x133)+'l',quality:quality='good',costSensitive:costSensitive=![],contextSize:contextSize=0x0,vision:vision=!![],env:env={}}=requirements;if(speed===_0x5774a3(0x144)+_0x5774a3(0x16c)&&!vision){if(env[_0x5774a3(0x156)+_0x5774a3(0x179)+'EY'])return log(_0x5774a3(0x181)+_0x5774a3(0x17a)+'Selec'+_0x5774a3(0x143)+_0x5774a3(0x175)+_0x5774a3(0x16c)+'\x20text'+'-only'+_0x5774a3(0x169)+_0x5774a3(0x191)+'g\x20Gro'+'q'),'groq';}if(contextSize>0x30d40){if(env['GEMIN'+'I_API'+'_KEY'])return log('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+_0x5774a3(0x143)+'Large'+'\x20cont'+'ext\x20d'+_0x5774a3(0x13f)+_0x5774a3(0x138)+_0x5774a3(0x165)+'ing\x20G'+'emini'),'gemin'+'i';}if(quality===_0x5774a3(0x145)){if(env['GEMIN'+'I_API'+_0x5774a3(0x16b)])return log('[Mode'+'lTier'+_0x5774a3(0x149)+_0x5774a3(0x143)+'Best\x20'+_0x5774a3(0x132)+'ty\x20re'+'quire'+'d,\x20se'+'lecti'+_0x5774a3(0x155)+_0x5774a3(0x184)),_0x5774a3(0x176)+'i';if(env[_0x5774a3(0x18c)+_0x5774a3(0x15f)+_0x5774a3(0x16b)])return log('[Mode'+'lTier'+'Selec'+_0x5774a3(0x143)+'Best\x20'+'quali'+_0x5774a3(0x15d)+'quire'+_0x5774a3(0x159)+_0x5774a3(0x15e)+'ng\x20Op'+_0x5774a3(0x140)),_0x5774a3(0x151)+'i';}if(speed===_0x5774a3(0x163)&&quality==='good'){if(env[_0x5774a3(0x173)+'I_API'+_0x5774a3(0x16b)])return log('[Mode'+_0x5774a3(0x17a)+'Selec'+_0x5774a3(0x143)+_0x5774a3(0x141)+'+\x20goo'+_0x5774a3(0x16a)+_0x5774a3(0x15c)+_0x5774a3(0x174)+_0x5774a3(0x188)+_0x5774a3(0x148)+'ni'),_0x5774a3(0x176)+'i';}if(costSensitive){if(env['GEMIN'+_0x5774a3(0x15f)+'_KEY'])return log('[Mode'+_0x5774a3(0x17a)+'Selec'+'tor]\x20'+_0x5774a3(0x17b)+_0x5774a3(0x16f)+'tive,'+'\x20sele'+'cting'+_0x5774a3(0x148)+'ni'),_0x5774a3(0x176)+'i';if(env['GROQ_'+_0x5774a3(0x179)+'EY']&&!vision)return log('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+'tor]\x20'+'Cost-'+'sensi'+'tive\x20'+'text-'+'only,'+_0x5774a3(0x174)+'cting'+_0x5774a3(0x134)),_0x5774a3(0x185);}if(vision&&env['GROQ_'+_0x5774a3(0x179)+'EY'])return log('[Mode'+'lTier'+'Selec'+'tor]\x20'+'Defau'+'lt,\x20s'+'elect'+_0x5774a3(0x161)+'roq\x20('+_0x5774a3(0x137)+'n\x20sup'+'porte'+'d)'),'groq';if(env['GEMIN'+_0x5774a3(0x15f)+_0x5774a3(0x16b)])return log(_0x5774a3(0x181)+'lTier'+_0x5774a3(0x149)+_0x5774a3(0x143)+_0x5774a3(0x153)+'lt,\x20s'+_0x5774a3(0x165)+'ing\x20G'+'emini'),_0x5774a3(0x176)+'i';if(env['OPENA'+'I_API'+'_KEY'])return log(_0x5774a3(0x181)+_0x5774a3(0x17a)+_0x5774a3(0x149)+'tor]\x20'+_0x5774a3(0x153)+_0x5774a3(0x18e)+_0x5774a3(0x165)+'ing\x20O'+'penAI'),'opena'+'i';if(env[_0x5774a3(0x14b)+'OPIC_'+_0x5774a3(0x179)+'EY'])return log('[Mode'+'lTier'+_0x5774a3(0x149)+'tor]\x20'+_0x5774a3(0x153)+_0x5774a3(0x18e)+_0x5774a3(0x165)+'ing\x20C'+_0x5774a3(0x14a)),_0x5774a3(0x14c)+'e';return warn('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+_0x5774a3(0x143)+'No\x20AP'+'I\x20key'+'s\x20fou'+'nd,\x20d'+'efaul'+_0x5774a3(0x13d)+_0x5774a3(0x14d)+'mini'),_0x5774a3(0x176)+'i';}export function selectModelTierAndProvider(_0x5e8f3e={}){const _0x50d3f2=_0x2c37,{requirements:requirements={},..._0x31e560}=_0x5e8f3e,_0x5e580f=selectModelTier(_0x31e560),_0x5178d5={...requirements};_0x5178d5['env']=process[_0x50d3f2(0x14e)];const _0x2795ce=selectProvider(_0x5178d5),_0xef9135={};return _0xef9135['tier']=_0x5e580f,_0xef9135['provi'+'der']=_0x2795ce,_0xef9135['reaso'+'n']='Selec'+_0x50d3f2(0x152)+_0x2795ce+'\x20'+_0x5e580f+('\x20tier'+_0x50d3f2(0x136)+'d\x20on\x20'+'conte'+'xt'),_0xef9135;}

package/src/rubrics.mjs CHANGED Viewed

@@ -72,12 +72,19 @@ export const DEFAULT_RUBRIC = {
 /**
  * Build rubric prompt section
- *
+ *
  * @param {import('./index.mjs').Rubric | null} [rubric=null] - Rubric to use, or null for default
  * @param {boolean} [includeDimensions=true] - Whether to include evaluation dimensions
+ * @param {{ referenceImages?: Record<number, string> }} [options={}] - Options
+ *   referenceImages: map of score level -> image path for visual anchoring.
+ *   When provided, the rubric prompt instructs the VLM to compare against
+ *   reference images for each score level (Prometheus-Vision, arXiv:2401.06591).
+ *   The caller is responsible for encoding and attaching images to the API call;
+ *   this function only generates the text prompt referencing them.
  * @returns {string} Formatted rubric prompt text
  */
-export function buildRubricPrompt(rubric = null, includeDimensions = true) {
+export function buildRubricPrompt(rubric = null, includeDimensions = true, options = {}) {
+  const { referenceImages = null } = options;
   const rubricToUse = rubric || DEFAULT_RUBRIC;
   let prompt = `## EVALUATION RUBRIC
@@ -114,6 +121,19 @@ JSON: {"score": 3, "assessment": "fail", "issues": ["broken layout", "critical c
 7. List specific issues found (if any)
 8. Provide reasoning for your score`;
+  // Visual anchoring: reference images for score levels (Prometheus-Vision, arXiv:2401.06591)
+  if (referenceImages && typeof referenceImages === 'object') {
+    const levels = Object.keys(referenceImages).map(Number).sort((a, b) => b - a);
+    if (levels.length > 0) {
+      prompt += `\n\n### Visual Reference Anchors:
+The following reference images are provided as calibration anchors for specific score levels.
+Compare the screenshot being evaluated against these references to calibrate your scoring.
+${levels.map(level => `- **Score ${level}**: See reference image labeled "REF_SCORE_${level}"`).join('\n')}
+Use these references to anchor your absolute scores. A screenshot similar in quality to REF_SCORE_9 should score around 9, etc.`;
+    }
+  }
   if (includeDimensions && rubricToUse.dimensions) {
     prompt += `\n\n### Evaluation Dimensions:
 ${Object.entries(rubricToUse.dimensions)

package/src/score-calibration.mjs ADDED Viewed

@@ -0,0 +1,177 @@
+/**
+ * Score Calibration
+ *
+ * Adjusts raw VLM scores to reduce provider-specific bias.
+ * Research shows each VLM has a stable "evaluative fingerprint" --
+ * systematic scoring tendencies that differ across providers
+ * (Evaluative Fingerprints, arXiv:2601.05114).
+ *
+ * Supports:
+ * - Per-provider linear calibration (offset + scale)
+ * - User-supplied calibration profiles
+ * - Score histogram analysis for drift detection
+ */
+import { warn } from './logger.mjs';
+import { ValidationError } from './errors.mjs';
+/**
+ * Default calibration profiles per provider.
+ *
+ * These are initial estimates based on observed tendencies.
+ * Users should override with their own profiles via calibrate()
+ * after running createCalibrationSuite().
+ *
+ * Format: { offset, scale } where calibrated = (raw + offset) * scale
+ * Then clamped to [0, 10].
+ */
+const DEFAULT_PROFILES = {
+  gemini:    { offset: 0, scale: 1.0 },
+  openai:    { offset: 0, scale: 1.0 },
+  claude:    { offset: 0, scale: 1.0 },
+  groq:      { offset: 0, scale: 1.0 },
+  openrouter: { offset: 0, scale: 1.0 }
+};
+// User-supplied profiles override defaults
+let userProfiles = {};
+/**
+ * Set calibration profile for a provider
+ *
+ * @param {string} provider - Provider name
+ * @param {{ offset: number, scale: number }} profile - Calibration profile
+ */
+export function setCalibrationProfile(provider, profile) {
+  if (typeof profile.offset !== 'number' || typeof profile.scale !== 'number') {
+    throw new ValidationError('Calibration profile must have numeric offset and scale', { offset: typeof profile.offset, scale: typeof profile.scale });
+  }
+  if (profile.scale <= 0) {
+    throw new ValidationError('Calibration scale must be positive', { scale: profile.scale });
+  }
+  userProfiles[provider] = { ...profile };
+}
+/**
+ * Get calibration profile for a provider
+ *
+ * @param {string} provider - Provider name
+ * @returns {{ offset: number, scale: number }} Calibration profile
+ */
+export function getCalibrationProfile(provider) {
+  return userProfiles[provider] || DEFAULT_PROFILES[provider] || { offset: 0, scale: 1.0 };
+}
+/**
+ * Reset all calibration profiles to defaults
+ */
+export function resetCalibrationProfiles() {
+  userProfiles = {};
+}
+/**
+ * Calibrate a raw score using the provider's profile
+ *
+ * @param {number | null} score - Raw score from VLM (0-10)
+ * @param {string} provider - Provider name
+ * @returns {number | null} Calibrated score (0-10), or null if input is null
+ */
+export function calibrateScore(score, provider) {
+  if (score === null || score === undefined) {
+    return null;
+  }
+  const profile = getCalibrationProfile(provider);
+  const calibrated = (score + profile.offset) * profile.scale;
+  // Clamp to [0, 10]
+  return Math.max(0, Math.min(10, Math.round(calibrated * 100) / 100));
+}
+/**
+ * Derive a calibration profile from labeled data
+ *
+ * Given pairs of (raw VLM score, expected score), computes the
+ * least-squares linear fit: expected = raw * scale + offset.
+ *
+ * @param {Array<{ raw: number, expected: number }>} pairs - Score pairs
+ * @returns {{ offset: number, scale: number, r2: number }} Calibration profile with fit quality
+ */
+export function deriveCalibrationProfile(pairs) {
+  if (!Array.isArray(pairs) || pairs.length < 2) {
+    throw new ValidationError('Need at least 2 (raw, expected) pairs to derive calibration', { count: pairs?.length ?? 0 });
+  }
+  const n = pairs.length;
+  let sumX = 0, sumY = 0, sumXX = 0, sumXY = 0, sumYY = 0;
+  for (const { raw, expected } of pairs) {
+    sumX += raw;
+    sumY += expected;
+    sumXX += raw * raw;
+    sumXY += raw * expected;
+    sumYY += expected * expected;
+  }
+  const denom = n * sumXX - sumX * sumX;
+  if (Math.abs(denom) < 1e-10) {
+    warn('[Calibration] All raw scores are identical; cannot derive profile');
+    return { offset: 0, scale: 1.0, r2: 0 };
+  }
+  // Linear regression: expected = scale * raw + offset_intercept
+  // We want calibrated = (raw + offset) * scale, so:
+  // calibrated = raw * scale + offset * scale
+  // Matching: scale = slope, offset_intercept = offset * scale -> offset = intercept / scale
+  const slope = (n * sumXY - sumX * sumY) / denom;
+  const intercept = (sumY - slope * sumX) / n;
+  // Convert to our format: calibrated = (raw + offset) * scale
+  const scale = slope || 1.0;
+  const offset = scale !== 0 ? intercept / scale : 0;
+  // R-squared
+  const meanY = sumY / n;
+  const ssTot = sumYY - n * meanY * meanY;
+  const ssRes = pairs.reduce((sum, { raw, expected }) => {
+    const predicted = raw * slope + intercept;
+    return sum + (expected - predicted) ** 2;
+  }, 0);
+  const r2 = ssTot > 0 ? 1 - ssRes / ssTot : 0;
+  return { offset, scale, r2 };
+}
+/**
+ * Analyze score distribution for a provider to detect drift
+ *
+ * @param {number[]} scores - Array of scores from a single provider
+ * @returns {{ mean: number, stddev: number, skew: number, histogram: Record<number, number> }}
+ */
+export function analyzeScoreDistribution(scores) {
+  if (!scores.length) {
+    return { mean: 0, stddev: 0, skew: 0, histogram: {} };
+  }
+  const n = scores.length;
+  const mean = scores.reduce((a, b) => a + b, 0) / n;
+  const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / n;
+  const stddev = Math.sqrt(variance);
+  // Skewness (Fisher's)
+  const skew = stddev > 0
+    ? scores.reduce((sum, s) => sum + ((s - mean) / stddev) ** 3, 0) / n
+    : 0;
+  // Histogram (integer buckets 0-10)
+  const histogram = {};
+  for (let i = 0; i <= 10; i++) histogram[i] = 0;
+  for (const s of scores) {
+    const bucket = Math.max(0, Math.min(10, Math.round(s)));
+    histogram[bucket]++;
+  }
+  return { mean, stddev, skew, histogram };
+}