@arclabs561/ai-visual-test 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,11 +13,19 @@ import { warn, log } from './logger.mjs';
13
13
  import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from 'fs';
14
14
  import { join } from 'path';
15
15
 
16
- // Lazy import to avoid circular dependencies
16
+ // Lazy import -- evaluation/ directory may not be present (removed from dist)
17
17
  let humanValidationModule = null;
18
+ let humanValidationUnavailable = false;
18
19
  async function getHumanValidationModule() {
20
+ if (humanValidationUnavailable) return null;
19
21
  if (!humanValidationModule) {
20
- humanValidationModule = await import('../evaluation/human-validation/human-validation.mjs');
22
+ try {
23
+ humanValidationModule = await import('../evaluation/human-validation/human-validation.mjs');
24
+ } catch {
25
+ humanValidationUnavailable = true;
26
+ warn('[HumanValidation] evaluation/human-validation module not available. Human validation features disabled.');
27
+ return null;
28
+ }
21
29
  }
22
30
  return humanValidationModule;
23
31
  }
@@ -101,6 +109,7 @@ export class HumanValidationManager {
101
109
  */
102
110
  async _saveCalibrationCache() {
103
111
  const humanValidation = await getHumanValidationModule();
112
+ if (!humanValidation) return;
104
113
  const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
105
114
 
106
115
  if (!this.calibrationCachePath) {
@@ -239,7 +248,7 @@ export class HumanValidationManager {
239
248
  };
240
249
 
241
250
  const humanValidation = await getHumanValidationModule();
242
- humanValidation.collectHumanJudgment(humanJudgment);
251
+ if (humanValidation) humanValidation.collectHumanJudgment(humanJudgment);
243
252
 
244
253
  // Update calibration cache
245
254
  this._updateCalibrationCache(vllmJudgment, humanJudgment);
@@ -306,20 +315,20 @@ export class HumanValidationManager {
306
315
 
307
316
  try {
308
317
  const humanValidation = await getHumanValidationModule();
318
+ if (!humanValidation) return;
309
319
  const humanJudgments = this.calibrationCache.judgments.map(j => j.human);
310
320
  const vllmJudgments = this.calibrationCache.judgments.map(j => j.vllm);
311
-
321
+
312
322
  const calibration = humanValidation.compareJudgments(humanJudgments, vllmJudgments);
313
-
323
+
314
324
  this.calibrationCache.lastCalibration = {
315
325
  ...calibration,
316
326
  timestamp: new Date().toISOString(),
317
327
  sampleSize: this.calibrationCache.judgments.length
318
328
  };
319
-
329
+
320
330
  // Save calibration results
321
- const humanValidationModule = await getHumanValidationModule();
322
- humanValidationModule.saveCalibrationResults(calibration);
331
+ humanValidation.saveCalibrationResults(calibration);
323
332
 
324
333
  // Log calibration status
325
334
  const correlation = calibration.agreement.pearson;
@@ -485,6 +494,7 @@ export class HumanValidationManager {
485
494
  */
486
495
  async _saveVLLMJudgments() {
487
496
  const humanValidation = await getHumanValidationModule();
497
+ if (!humanValidation) return;
488
498
  const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
489
499
 
490
500
  if (!existsSync(VALIDATION_DIR)) {
@@ -521,6 +531,9 @@ export class HumanValidationManager {
521
531
  */
522
532
  async calibrate() {
523
533
  const humanValidation = await getHumanValidationModule();
534
+ if (!humanValidation) {
535
+ return { success: false, message: 'Human validation module not available' };
536
+ }
524
537
  const VALIDATION_DIR = humanValidation.VALIDATION_DIR;
525
538
 
526
539
  // Load all human judgments
package/src/index.mjs CHANGED
@@ -17,16 +17,6 @@
17
17
  import { loadEnv } from './load-env.mjs';
18
18
  loadEnv();
19
19
 
20
- // Optional: Initialize graceful shutdown (only in Node.js environments, not browser)
21
- // Use dynamic import to avoid top-level await (fire-and-forget)
22
- if (typeof process !== 'undefined' && process.env.NODE_ENV !== 'test') {
23
- import('./graceful-shutdown.mjs').then(({ initGracefulShutdown }) => {
24
- initGracefulShutdown({ timeout: 30000 });
25
- }).catch(() => {
26
- // Graceful shutdown is optional, don't fail if unavailable
27
- });
28
- }
29
-
30
20
  import { VLLMJudge, validateScreenshot as _validateScreenshot } from './judge.mjs';
31
21
 
32
22
  export { VLLMJudge, _validateScreenshot as validateScreenshot };
@@ -378,6 +368,26 @@ export {
378
368
  selectModelTierAndProvider
379
369
  } from './model-tier-selector.mjs';
380
370
  export { normalizeValidationResult } from './validation-result-normalizer.mjs';
371
+
372
+ // Score calibration (per-provider bias correction, arXiv:2601.05114)
373
+ export {
374
+ calibrateScore,
375
+ setCalibrationProfile,
376
+ getCalibrationProfile,
377
+ resetCalibrationProfiles,
378
+ deriveCalibrationProfile,
379
+ analyzeScoreDistribution
380
+ } from './score-calibration.mjs';
381
+
382
+ // Meta-evaluation (test the tester, arXiv:2507.10062)
383
+ export { createCalibrationSuite } from './calibration-suite.mjs';
384
+
385
+ // Known VLM limitations (arXiv:2501.09236, arXiv:2511.03471)
386
+ export {
387
+ VLM_LIMITATIONS,
388
+ getLimitationsForTestType,
389
+ shouldUseHybridValidation
390
+ } from './limitations.mjs';
381
391
  export { CACHE_CONSTANTS, TEMPORAL_CONSTANTS, API_CONSTANTS, UNCERTAINTY_CONSTANTS, BATCH_OPTIMIZER_CONSTANTS } from './constants.mjs';
382
392
  export {
383
393
  StateValidator,
@@ -17,6 +17,7 @@
17
17
  */
18
18
 
19
19
  import { validatePage } from '../convenience.mjs';
20
+ import { ConfigError } from '../errors.mjs';
20
21
 
21
22
  /**
22
23
  * Create custom matchers for Playwright's expect
@@ -42,7 +43,7 @@ import { validatePage } from '../convenience.mjs';
42
43
  */
43
44
  export function createMatchers(expect) {
44
45
  if (!expect || typeof expect.extend !== 'function') {
45
- throw new Error('createMatchers requires Playwright\'s expect object. Import it from @playwright/test');
46
+ throw new ConfigError('createMatchers requires Playwright\'s expect object. Import it from @playwright/test');
46
47
  }
47
48
  expect.extend({
48
49
  /**
@@ -65,10 +66,15 @@ export function createMatchers(expect) {
65
66
  result = await validatePage(target, prompt, options);
66
67
  }
67
68
 
69
+ // Format issues for display
70
+ const formattedIssues = result.issues?.slice(0, 5).map(issue => {
71
+ if (typeof issue === 'string') return issue;
72
+ return JSON.stringify(issue);
73
+ }).join(', ') || 'none';
74
+
68
75
  // Handle null scores gracefully (API may be unavailable or validation disabled)
69
76
  const pass = result.score !== null && result.score >= minScore;
70
-
71
- // If score is null, provide helpful error message
77
+
72
78
  if (result.score === null) {
73
79
  return {
74
80
  message: () =>
@@ -83,12 +89,6 @@ export function createMatchers(expect) {
83
89
  };
84
90
  }
85
91
 
86
- // Format issues for display
87
- const formattedIssues = result.issues?.slice(0, 5).map(issue => {
88
- if (typeof issue === 'string') return issue;
89
- return JSON.stringify(issue);
90
- }).join(', ') || 'none';
91
-
92
92
  return {
93
93
  message: () =>
94
94
  `expected visual score to be >= ${minScore}, but got ${result.score}.\nIssues: ${formattedIssues}${result.issues?.length > 5 ? ` (and ${result.issues.length - 5} more)` : ''}\nReasoning: ${result.reasoning?.substring(0, 200)}${result.reasoning?.length > 200 ? '...' : ''}`,
package/src/judge.mjs CHANGED
@@ -71,23 +71,12 @@ export class VLLMJudge {
71
71
  // Note: imagePath may already be validated/resolved from judgeScreenshot
72
72
  let validatedPath;
73
73
  try {
74
- // If path is already absolute (starts with / or is in tmpdir), allow it
75
- // This allows legitimate temp files and absolute paths
76
- // For relative paths, use standard validation (prevents path traversal)
77
- if (imagePath.startsWith('/') || imagePath.startsWith(process.cwd())) {
78
- // Absolute path - resolve and validate format only
79
- const resolved = resolve(imagePath);
80
- // Check if it's a valid image format
81
- const validExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp'];
82
- const hasValidExtension = validExtensions.some(ext =>
83
- resolved.toLowerCase().endsWith(ext)
84
- );
85
- if (!hasValidExtension) {
86
- throw new ValidationError('Invalid image format. Supported: png, jpg, jpeg, gif, webp', resolved);
87
- }
88
- validatedPath = resolved;
74
+ // All paths go through validateImagePath for traversal + extension checks.
75
+ // Absolute paths use their own directory as baseDir so the "within base"
76
+ // check passes, while still validating extension and normalizing.
77
+ if (imagePath.startsWith('/')) {
78
+ validatedPath = validateImagePath(basename(imagePath), { baseDir: dirname(resolve(imagePath)) });
89
79
  } else {
90
- // Relative path - use standard validation (prevents path traversal)
91
80
  validatedPath = validateImagePath(imagePath);
92
81
  }
93
82
  } catch (validationError) {
@@ -804,6 +793,8 @@ export class VLLMJudge {
804
793
  issues: semanticInfo.issues,
805
794
  assessment: semanticInfo.assessment,
806
795
  reasoning: semanticInfo.reasoning,
796
+ recommendations: semanticInfo.recommendations || [],
797
+ strengths: semanticInfo.strengths || [],
807
798
  pricing: this.providerConfig.pricing,
808
799
  estimatedCost,
809
800
  responseTime,
@@ -1067,7 +1058,7 @@ export class VLLMJudge {
1067
1058
  }
1068
1059
 
1069
1060
  return {
1070
- score: judgment.score || null,
1061
+ score: judgment.score ?? null,
1071
1062
  issues: issues,
1072
1063
  assessment: judgment.assessment || null,
1073
1064
  reasoning: judgment.reasoning || null,
@@ -1108,7 +1099,7 @@ export class VLLMJudge {
1108
1099
  }
1109
1100
 
1110
1101
  return {
1111
- score: parsed.score || null,
1102
+ score: parsed.score ?? null,
1112
1103
  issues: issues,
1113
1104
  assessment: parsed.assessment || null,
1114
1105
  reasoning: parsed.reasoning || null,
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Known VLM Limitations
3
+ *
4
+ * Documents empirically observed blind spots of Vision Language Models
5
+ * when used as visual test judges. Based on:
6
+ * - VLM Visual Bug Detection in HTML5 Canvas (arXiv:2501.09236)
7
+ * - Web Accessibility Audit with MLLMs (arXiv:2511.03471)
8
+ * - WebAccessVL (arXiv:2602.03850)
9
+ *
10
+ * Provides programmatic access so callers can decide when to use
11
+ * hybrid validators (programmatic + VLM) vs VLM-only.
12
+ */
13
+
14
+ /**
15
+ * Known limitation categories with descriptions and recommended alternatives.
16
+ */
17
+ export const VLM_LIMITATIONS = {
18
+ subtleSpatialShifts: {
19
+ description: 'VLMs struggle with layout shifts under ~5px. Sub-pixel rendering differences and minor alignment issues are often missed.',
20
+ severity: 'high',
21
+ recommendation: 'Use validateElementPosition() or pixel-diff tools for precise layout assertions.',
22
+ vlmAccuracy: 'low'
23
+ },
24
+
25
+ elementOverlap: {
26
+ description: 'Partially overlapping elements are often not detected, especially when the overlap is small or involves transparent regions.',
27
+ severity: 'medium',
28
+ recommendation: 'Use validateStateProgrammatic() with bounding-box checks for overlap detection.',
29
+ vlmAccuracy: 'low'
30
+ },
31
+
32
+ keyboardNavigation: {
33
+ description: 'VLMs cannot assess keyboard navigability from a static screenshot. Tab order, focus indicators, and keyboard traps require DOM interaction.',
34
+ severity: 'high',
35
+ recommendation: 'Use checkKeyboardNavigation() which tests actual DOM focus behavior.',
36
+ vlmAccuracy: 'none'
37
+ },
38
+
39
+ screenReaderOrder: {
40
+ description: 'Reading order for assistive technology cannot be determined from visual appearance alone. Requires DOM/ARIA analysis.',
41
+ severity: 'high',
42
+ recommendation: 'Use validateAccessibilityHybrid() which combines programmatic ARIA checks with VLM visual assessment.',
43
+ vlmAccuracy: 'none'
44
+ },
45
+
46
+ colorContrastPrecision: {
47
+ description: 'VLMs can detect obviously poor contrast but cannot reliably compute exact contrast ratios to WCAG thresholds (4.5:1, 3:1).',
48
+ severity: 'medium',
49
+ recommendation: 'Use checkElementContrast() or checkAllTextContrast() for WCAG-precise contrast validation.',
50
+ vlmAccuracy: 'medium'
51
+ },
52
+
53
+ dynamicContent: {
54
+ description: 'Single-screenshot evaluation misses animation timing, transition smoothness, and loading state sequences.',
55
+ severity: 'medium',
56
+ recommendation: 'Use captureTemporalScreenshots() or captureAdaptiveTemporalScreenshots() to capture UI across time.',
57
+ vlmAccuracy: 'low'
58
+ },
59
+
60
+ textContent: {
61
+ description: 'VLMs may misread small text, especially at low resolution or with unusual fonts. OCR accuracy decreases below ~12px rendered text.',
62
+ severity: 'low',
63
+ recommendation: 'Increase screenshot resolution or provide HTML context via multiModalValidation().',
64
+ vlmAccuracy: 'medium'
65
+ },
66
+
67
+ interactiveState: {
68
+ description: 'Hover states, active states, and focus indicators are not visible in static screenshots unless captured at that exact moment.',
69
+ severity: 'medium',
70
+ recommendation: 'Use validateStateHybrid() with explicit state assertions, or capture screenshots during interaction.',
71
+ vlmAccuracy: 'low'
72
+ }
73
+ };
74
+
75
+ /**
76
+ * Get limitations relevant to a given test type
77
+ *
78
+ * @param {'accessibility' | 'layout' | 'visual' | 'interaction' | 'general'} testType
79
+ * @returns {Array<{ key: string, description: string, severity: string, recommendation: string, vlmAccuracy: string }>}
80
+ */
81
+ export function getLimitationsForTestType(testType) {
82
+ const relevanceMap = {
83
+ accessibility: ['keyboardNavigation', 'screenReaderOrder', 'colorContrastPrecision'],
84
+ layout: ['subtleSpatialShifts', 'elementOverlap'],
85
+ visual: ['colorContrastPrecision', 'textContent', 'dynamicContent'],
86
+ interaction: ['keyboardNavigation', 'interactiveState', 'dynamicContent'],
87
+ general: Object.keys(VLM_LIMITATIONS)
88
+ };
89
+
90
+ const keys = relevanceMap[testType] || relevanceMap.general;
91
+ return keys.map(key => ({ key, ...VLM_LIMITATIONS[key] }));
92
+ }
93
+
94
+ /**
95
+ * Check if a test type should use hybrid validation
96
+ *
97
+ * Returns true if the test type has known VLM blind spots where
98
+ * hybrid validators would improve accuracy.
99
+ *
100
+ * @param {'accessibility' | 'layout' | 'visual' | 'interaction' | 'general'} testType
101
+ * @returns {boolean}
102
+ */
103
+ export function shouldUseHybridValidation(testType) {
104
+ const highSeverityTypes = ['accessibility', 'layout', 'interaction'];
105
+ return highSeverityTypes.includes(testType);
106
+ }
package/src/load-env.mjs CHANGED
@@ -9,6 +9,7 @@ import { readFileSync, existsSync } from 'fs';
9
9
  import { join, dirname } from 'path';
10
10
  import { fileURLToPath } from 'url';
11
11
  import { warn } from './logger.mjs';
12
+ import { RATE_LIMIT_BOUNDS } from './constants.mjs';
12
13
 
13
14
  const __filename = fileURLToPath(import.meta.url);
14
15
  const __dirname = dirname(__filename);
@@ -37,8 +38,8 @@ const VALID_PROVIDERS = ['gemini', 'openai', 'claude', 'groq'];
37
38
  // Validation functions for environment variables
38
39
  function validateRateLimitMaxRequests(value) {
39
40
  const num = parseInt(value, 10);
40
- if (isNaN(num) || num < 1 || num > 1000) {
41
- warn(`[LoadEnv] Invalid RATE_LIMIT_MAX_REQUESTS: ${value}. Must be between 1 and 1000. Using default.`);
41
+ if (isNaN(num) || num < RATE_LIMIT_BOUNDS.MIN || num > RATE_LIMIT_BOUNDS.MAX) {
42
+ warn(`[LoadEnv] Invalid RATE_LIMIT_MAX_REQUESTS: ${value}. Must be between ${RATE_LIMIT_BOUNDS.MIN} and ${RATE_LIMIT_BOUNDS.MAX}. Using default.`);
42
43
  return null; // Will use default
43
44
  }
44
45
  return num;
@@ -1 +1 @@
1
- function _0x5d6a(_0x1fc55d,_0x5aa380){const _0x215146=_0x2e89();return _0x5d6a=function(_0x51a7ef,_0x713760){_0x51a7ef=_0x51a7ef-0x98;let _0x2e895d=_0x215146[_0x51a7ef];if(_0x5d6a['IMtAko']===undefined){var _0x5d6ad6=function(_0x1f2668){const _0x39d7c6='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x4d2da3='',_0x7771fb='',_0x25d851=_0x4d2da3+_0x5d6ad6;for(let _0x21c946=0x0,_0x1df72e,_0x431060,_0x5382e8=0x0;_0x431060=_0x1f2668['charAt'](_0x5382e8++);~_0x431060&&(_0x1df72e=_0x21c946%0x4?_0x1df72e*0x40+_0x431060:_0x431060,_0x21c946++%0x4)?_0x4d2da3+=_0x25d851['charCodeAt'](_0x5382e8+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x1df72e>>(-0x2*_0x21c946&0x6)):_0x21c946:0x0){_0x431060=_0x39d7c6['indexOf'](_0x431060);}for(let _0x2a6669=0x0,_0x3e6f1f=_0x4d2da3['length'];_0x2a6669<_0x3e6f1f;_0x2a6669++){_0x7771fb+='%'+('00'+_0x4d2da3['charCodeAt'](_0x2a6669)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x7771fb);};_0x5d6a['lBqhoq']=_0x5d6ad6,_0x1fc55d=arguments,_0x5d6a['IMtAko']=!![];}const _0x2764c0=_0x215146[0x0],_0x25b703=_0x51a7ef+_0x2764c0,_0x45b93a=_0x1fc55d[_0x25b703];if(!_0x45b93a){const _0x2050d5=function(_0x16ed5c){this['iWUvnu']=_0x16ed5c,this['zKZLFh']=[0x1,0x0,0x0],this['sOpVUZ']=function(){return'newState';},this['hFLCPH']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*',this['qpkMzz']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0x2050d5['prototype']['rjLgcs']=function(){const _0x4b3563=new RegExp(this['hFLCPH']+this['qpkMzz']),_0x132168=_0x4b3563['test'](this['sOpVUZ']['toString']())?--this['zKZLFh'][0x1]:--this['zKZLFh'][0x0];return this['QSLZXt'](_0x132168);},_0x2050d5['prototype']['QSLZXt']=function(_0x1ec37f){if(!Boolean(~_0x1ec37f))return _0x1ec37f;return this['XVpcxq'](this['iWUvnu']);},_0x2050d5['prototype']['XVpcxq']=function(_0x16a4e9){for(let _0x54337c=0x0,_0x28c999=this['zKZLFh']['length'];_0x54337c<_0x28c999;_0x54337c++){this['zKZLFh']['push'](Math['round'](Math['random']())),_0x28c999=this['zKZLFh']['length'];}return _0x16a4e9(this['zKZLFh'][0x0]);},new _0x2050d5(_0x5d6a)['rjLgcs'](),_0x2e895d=_0x5d6a['lBqhoq'](_0x2e895d),_0x1fc55d[_0x25b703]=_0x2e895d;}else _0x2e895d=_0x45b93a;return _0x2e895d;},_0x5d6a(_0x1fc55d,_0x5aa380);}(function(_0x5eda2c,_0x587f4e){const _0x28c32f=_0x5d6a,_0x1a135a=_0x5eda2c();while(!![]){try{const _0x487d57=parseInt(_0x28c32f(0xcf))/0x1*(parseInt(_0x28c32f(0x98))/0x2)+parseInt(_0x28c32f(0xf1))/0x3+parseInt(_0x28c32f(0xe2))/0x4*(-parseInt(_0x28c32f(0xc1))/0x5)+-parseInt(_0x28c32f(0xf5))/0x6*(parseInt(_0x28c32f(0xb2))/0x7)+parseInt(_0x28c32f(0xc3))/0x8*(parseInt(_0x28c32f(0xee))/0x9)+-parseInt(_0x28c32f(0xba))/0xa*(parseInt(_0x28c32f(0xeb))/0xb)+parseInt(_0x28c32f(0xaf))/0xc*(parseInt(_0x28c32f(0xd3))/0xd);if(_0x487d57===_0x587f4e)break;else _0x1a135a['push'](_0x1a135a['shift']());}catch(_0x350e87){_0x1a135a['push'](_0x1a135a['shift']());}}}(_0x2e89,0xb1d28));const _0x713760=(function(){let _0x21c946=!![];return function(_0x1df72e,_0x431060){const _0x5382e8=_0x21c946?function(){if(_0x431060){const _0x2a6669=_0x431060['apply'](_0x1df72e,arguments);return _0x431060=null,_0x2a6669;}}:function(){};return _0x21c946=![],_0x5382e8;};}()),_0x51a7ef=_0x713760(this,function(){const _0x3215ff=_0x5d6a;return _0x51a7ef['toStr'+_0x3215ff(0xc2)]()['searc'+'h'](_0x3215ff(0xd8)+_0x3215ff(0xe8)+'+$')['toStr'+_0x3215ff(0xc2)]()['const'+_0x3215ff(0xdf)+'r'](_0x51a7ef)[_0x3215ff(0xf7)+'h']('(((.+'+_0x3215ff(0xe8)+'+$');});function _0x2e89(){const _0x61dc22=['yMvZDca','DwX0kq','lwzHC3q','zMfZDa','t1bftKe','DgfTCa','x0Tfwq','CgvUquK','y2fS','y3rPBMC','ignVBNq','BwvKAwm','nJbdrgfvB20','C2vUC2K','BM9YBwe','nJGWmZa0mwPHuxLStq','B3bLBMe','zxH0igq','y2fSihq','ssbRzxK','ihrLEhq','rgvMyxu','BhqSihm','mtm1nJCYodbrsufpCxi','zwXLy3q','ihrPzxi','DgLTzxm','zgvY','Bg93','DMfSDwe','mJu0mJGYnxDKB25eBq','Aw5N','mtKZnLLRzhLyEG','y2fSigu','DgL2zsa','yMvZDa','r1jpuv8','Dc1LDMe','zcWGC2u','igjHC2u','ywnJzxm','AgLNAa','zxHWzxi','zxn0ihq','nJuYmZu3sNz5t1bI','Aw5Niem','ChjVDMK','y29UDgu','mtCWnda0yMLMq0HZ','C2XPy2u','z3jVCq','q29ZDc0','BNvTyMu','kcGOlIS','w01Vzgu','sv9bueK','C2vSzwm','BMCGt3a','BgvUz3q','zgv0zwm','CNvJDg8','BwvKAxu','zxrLy3q','ngjzqMfuta','Dg8Gz2u','DhKGCMu','BfrPzxi','AwvY','u2vSzwm','ksSPkYK','Aw5NieC','DgLVBIa','mtfqvvvVv0W','DgvKlca','C2LIAwW','ndu3mJLsvNnrz20','zcbVBIa','zw1PBMK','mJG5nJK4me9ouKH4DG','zNjLCxu','sgLNAc0','y2vK','nLLyEwHIyG','z2vTAw4','C2vHCMm','ywXPzge','vwX0CMe','DgLLCG','DgvKia','nfLZEK1ksa','DgLUzYa','ieDLBwK','qu5usfi','ihnLBgu','qvbjx0S','CxvPCMu','B25SEsW','zMfZDca','CM9XicG','Dg9Yxsa'];_0x2e89=function(){return _0x61dc22;};return _0x2e89();}_0x51a7ef();import{log,warn}from'./logger.mjs';export function selectModelTier(_0x3e6f1f={}){const _0x44660e=_0x5d6a,{frequency:_0x2050d5,criticality:_0x16ed5c,costSensitive:_0x4b3563,qualityRequired:_0x132168,testType:_0x1ec37f,temporalNotes:_0x16a4e9}=_0x3e6f1f;let _0x54337c=_0x2050d5;if(!_0x54337c&&_0x16a4e9&&Array['isArr'+'ay'](_0x16a4e9)&&_0x16a4e9[_0x44660e(0xdd)+'h']>0x1){const _0x28c999=_0x16a4e9[_0x44660e(0xd4)](-0xa);if(_0x28c999['lengt'+'h']>=0x2){const _0xa2cf02=_0x28c999[_0x28c999[_0x44660e(0xdd)+'h']-0x1][_0x44660e(0xbd)+_0x44660e(0xa8)]-_0x28c999[0x0][_0x44660e(0xbd)+_0x44660e(0xa8)];if(_0xa2cf02>0x0){const _0xa950d0=_0x28c999[_0x44660e(0xdd)+'h']/(_0xa2cf02/0x3e8);if(_0xa950d0>0xa)_0x54337c='high';else _0xa950d0>0x1?_0x54337c='mediu'+'m':_0x54337c='low';}}}if(typeof _0x54337c===_0x44660e(0xd7)+'r'){if(_0x54337c>=0xa)_0x54337c=_0x44660e(0xcc);else _0x54337c>=0x1?_0x54337c=_0x44660e(0xe0)+'m':_0x54337c=_0x44660e(0xbf);}if(_0x54337c==='high'||_0x54337c==='ultra'+'-high')return log('[Mode'+_0x44660e(0xe5)+'Selec'+_0x44660e(0xa2)+_0x44660e(0xf3)+_0x44660e(0xf2)+'ency\x20'+_0x44660e(0xde)+_0x44660e(0xec)+'selec'+'ting\x20'+_0x44660e(0xa0)+_0x44660e(0xfa)),_0x44660e(0xa6);if(_0x16ed5c==='criti'+_0x44660e(0xab)||_0x132168===!![])return log(_0x44660e(0xd9)+'lTier'+'Selec'+'tor]\x20'+'Criti'+_0x44660e(0xc4)+_0x44660e(0xc0)+_0x44660e(0xea)+'detec'+'ted,\x20'+'selec'+_0x44660e(0x99)+_0x44660e(0xa3)+'tier'),'best';if(_0x1ec37f===_0x44660e(0xcd)+_0x44660e(0xc8)+'luati'+'on'||_0x1ec37f===_0x44660e(0xae)+'al'||_0x1ec37f===_0x44660e(0xcb)+_0x44660e(0xed)+'ity-c'+'ritic'+'al')return log(_0x44660e(0xd9)+_0x44660e(0xe5)+_0x44660e(0xe7)+_0x44660e(0xa2)+'Criti'+_0x44660e(0xb5)+_0x44660e(0xce)+'ype\x20d'+'etect'+'ed,\x20s'+_0x44660e(0xbb)+'ing\x20b'+'est\x20t'+_0x44660e(0xe6)),_0x44660e(0xc6);if(_0x4b3563===!![])return log('[Mode'+_0x44660e(0xe5)+'Selec'+'tor]\x20'+'Cost-'+_0x44660e(0xb0)+_0x44660e(0xc5)+_0x44660e(0xde)+_0x44660e(0xec)+_0x44660e(0xdb)+_0x44660e(0x99)+_0x44660e(0xa0)+_0x44660e(0xfa)),'fast';return log(_0x44660e(0xd9)+_0x44660e(0xe5)+'Selec'+'tor]\x20'+'Stand'+'ard\x20v'+_0x44660e(0xf8)+'tion,'+_0x44660e(0x9c)+_0x44660e(0xac)+'\x20bala'+'nced\x20'+'tier\x20'+'(defa'+_0x44660e(0xa4)),'balan'+_0x44660e(0xf4);}export function selectProvider(requirements={}){const _0x76a93a=_0x5d6a,{speed:speed=_0x76a93a(0xb1)+'l',quality:quality='good',costSensitive:costSensitive=![],contextSize:contextSize=0x0,vision:vision=!![],env:env={}}=requirements;if(speed==='ultra'+_0x76a93a(0xa5)&&!vision){if(env[_0x76a93a(0xc7)+_0x76a93a(0x9d)+'EY'])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+_0x76a93a(0xf9)+'-fast'+_0x76a93a(0xb7)+'-only'+',\x20sel'+'ectin'+'g\x20Gro'+'q'),_0x76a93a(0xd5);}if(contextSize>0x30d40){if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Large'+_0x76a93a(0xad)+_0x76a93a(0xb4)+_0x76a93a(0xe1)+'ed,\x20s'+_0x76a93a(0xbb)+_0x76a93a(0xe9)+_0x76a93a(0xf0)),_0x76a93a(0xf6)+'i';}if(quality==='best'){if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Best\x20'+'quali'+_0x76a93a(0xe4)+_0x76a93a(0x9e)+'d,\x20se'+'lecti'+'ng\x20Ge'+'mini'),'gemin'+'i';if(env[_0x76a93a(0xa7)+'I_API'+'_KEY'])return log(_0x76a93a(0xd9)+'lTier'+'Selec'+_0x76a93a(0xa2)+'Best\x20'+'quali'+_0x76a93a(0xe4)+_0x76a93a(0x9e)+_0x76a93a(0xc9)+'lecti'+_0x76a93a(0xdc)+'enAI'),_0x76a93a(0xb3)+'i';}if(speed==='fast'&&quality==='good'){if(env['GEMIN'+'I_API'+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+_0x76a93a(0xa2)+'Fast\x20'+'+\x20goo'+'d\x20qua'+'lity,'+_0x76a93a(0x9c)+_0x76a93a(0xac)+'\x20Gemi'+'ni'),_0x76a93a(0xf6)+'i';}if(costSensitive){if(env['GEMIN'+_0x76a93a(0xda)+'_KEY'])return log('[Mode'+'lTier'+_0x76a93a(0xe7)+'tor]\x20'+_0x76a93a(0xd6)+'sensi'+'tive,'+_0x76a93a(0x9c)+_0x76a93a(0xac)+_0x76a93a(0x9a)+'ni'),'gemin'+'i';if(env['GROQ_'+_0x76a93a(0x9d)+'EY']&&!vision)return log('[Mode'+'lTier'+'Selec'+_0x76a93a(0xa2)+'Cost-'+_0x76a93a(0xb0)+_0x76a93a(0xc5)+'text-'+_0x76a93a(0x9f)+'\x20sele'+'cting'+'\x20Groq'),'groq';}if(vision&&env[_0x76a93a(0xc7)+_0x76a93a(0x9d)+'EY'])return log('[Mode'+_0x76a93a(0xe5)+'Selec'+'tor]\x20'+_0x76a93a(0xb8)+'lt,\x20s'+'elect'+_0x76a93a(0xe9)+_0x76a93a(0xa1)+'visio'+'n\x20sup'+'porte'+'d)'),_0x76a93a(0xd5);if(env['GEMIN'+_0x76a93a(0xda)+_0x76a93a(0xa9)])return log(_0x76a93a(0xd9)+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Defau'+_0x76a93a(0xb9)+_0x76a93a(0xbb)+_0x76a93a(0xe9)+_0x76a93a(0xf0)),'gemin'+'i';if(env['OPENA'+'I_API'+_0x76a93a(0xa9)])return log('[Mode'+_0x76a93a(0xe5)+'Selec'+_0x76a93a(0xa2)+_0x76a93a(0xb8)+_0x76a93a(0xb9)+_0x76a93a(0xbb)+'ing\x20O'+_0x76a93a(0xaa)),_0x76a93a(0xb3)+'i';if(env[_0x76a93a(0x9b)+'OPIC_'+'API_K'+'EY'])return log('[Mode'+_0x76a93a(0xe5)+_0x76a93a(0xe7)+'tor]\x20'+'Defau'+'lt,\x20s'+_0x76a93a(0xbb)+_0x76a93a(0xd0)+'laude'),'claud'+'e';return warn(_0x76a93a(0xd9)+'lTier'+_0x76a93a(0xe7)+'tor]\x20'+'No\x20AP'+_0x76a93a(0xb6)+'s\x20fou'+'nd,\x20d'+'efaul'+'ting\x20'+_0x76a93a(0xe3)+'mini'),'gemin'+'i';}export function selectModelTierAndProvider(_0x16a51c={}){const _0x5f31e3=_0x5d6a,{requirements:requirements={},..._0x5ead28}=_0x16a51c,_0x173dbd=selectModelTier(_0x5ead28),_0x440d7c={...requirements};_0x440d7c['env']=process['env'];const _0x4db66b=selectProvider(_0x440d7c),_0x1f5054={};return _0x1f5054['tier']=_0x173dbd,_0x1f5054[_0x5f31e3(0xd1)+_0x5f31e3(0xbe)]=_0x4db66b,_0x1f5054['reaso'+'n']='Selec'+_0x5f31e3(0xfb)+_0x4db66b+'\x20'+_0x173dbd+(_0x5f31e3(0xbc)+_0x5f31e3(0xca)+_0x5f31e3(0xef)+_0x5f31e3(0xd2)+'xt'),_0x1f5054;}
1
+ function _0x2c37(_0x4f15c3,_0x20b3ef){const _0x3ad6ad=_0x5bf3();return _0x2c37=function(_0x3900cc,_0x52fe26){_0x3900cc=_0x3900cc-0x132;let _0x5bf331=_0x3ad6ad[_0x3900cc];if(_0x2c37['vfJbBS']===undefined){var _0x2c3714=function(_0x1ea92b){const _0x516c15='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let _0x3e3d10='',_0x40ef71='',_0x35b26c=_0x3e3d10+_0x2c3714;for(let _0x367b8c=0x0,_0x2614ab,_0x37536b,_0x3f8227=0x0;_0x37536b=_0x1ea92b['charAt'](_0x3f8227++);~_0x37536b&&(_0x2614ab=_0x367b8c%0x4?_0x2614ab*0x40+_0x37536b:_0x37536b,_0x367b8c++%0x4)?_0x3e3d10+=_0x35b26c['charCodeAt'](_0x3f8227+0xa)-0xa!==0x0?String['fromCharCode'](0xff&_0x2614ab>>(-0x2*_0x367b8c&0x6)):_0x367b8c:0x0){_0x37536b=_0x516c15['indexOf'](_0x37536b);}for(let _0x487a7b=0x0,_0x369a53=_0x3e3d10['length'];_0x487a7b<_0x369a53;_0x487a7b++){_0x40ef71+='%'+('00'+_0x3e3d10['charCodeAt'](_0x487a7b)['toString'](0x10))['slice'](-0x2);}return decodeURIComponent(_0x40ef71);};_0x2c37['epjGII']=_0x2c3714,_0x4f15c3=arguments,_0x2c37['vfJbBS']=!![];}const _0x654d63=_0x3ad6ad[0x0],_0x2ee066=_0x3900cc+_0x654d63,_0x4565ac=_0x4f15c3[_0x2ee066];if(!_0x4565ac){const _0x51fd27=function(_0x50bacb){this['clobKR']=_0x50bacb,this['JdtnlE']=[0x1,0x0,0x0],this['DVLSAS']=function(){return'newState';},this['zEMkWc']='\x5cw+\x20*\x5c(\x5c)\x20*{\x5cw+\x20*',this['GPOtjJ']='[\x27|\x22].+[\x27|\x22];?\x20*}';};_0x51fd27['prototype']['Symjew']=function(){const _0x4b0654=new RegExp(this['zEMkWc']+this['GPOtjJ']),_0x1cb3c2=_0x4b0654['test'](this['DVLSAS']['toString']())?--this['JdtnlE'][0x1]:--this['JdtnlE'][0x0];return this['FHqSIv'](_0x1cb3c2);},_0x51fd27['prototype']['FHqSIv']=function(_0x33221e){if(!Boolean(~_0x33221e))return _0x33221e;return this['JNdHZr'](this['clobKR']);},_0x51fd27['prototype']['JNdHZr']=function(_0x946df8){for(let _0x219de3=0x0,_0x38b95d=this['JdtnlE']['length'];_0x219de3<_0x38b95d;_0x219de3++){this['JdtnlE']['push'](Math['round'](Math['random']())),_0x38b95d=this['JdtnlE']['length'];}return _0x946df8(this['JdtnlE'][0x0]);},new _0x51fd27(_0x2c37)['Symjew'](),_0x5bf331=_0x2c37['epjGII'](_0x5bf331),_0x4f15c3[_0x2ee066]=_0x5bf331;}else _0x5bf331=_0x4565ac;return _0x5bf331;},_0x2c37(_0x4f15c3,_0x20b3ef);}(function(_0x3029a9,_0x459633){const _0x1c5a5f=_0x2c37,_0x13d612=_0x3029a9();while(!![]){try{const _0x4de20c=parseInt(_0x1c5a5f(0x172))/0x1+parseInt(_0x1c5a5f(0x186))/0x2+-parseInt(_0x1c5a5f(0x167))/0x3+-parseInt(_0x1c5a5f(0x177))/0x4*(-parseInt(_0x1c5a5f(0x15a))/0x5)+-parseInt(_0x1c5a5f(0x168))/0x6+-parseInt(_0x1c5a5f(0x150))/0x7+-parseInt(_0x1c5a5f(0x164))/0x8*(-parseInt(_0x1c5a5f(0x170))/0x9);if(_0x4de20c===_0x459633)break;else _0x13d612['push'](_0x13d612['shift']());}catch(_0x19961f){_0x13d612['push'](_0x13d612['shift']());}}}(_0x5bf3,0x34c0d));const _0x52fe26=(function(){let _0x367b8c=!![];return function(_0x2614ab,_0x37536b){const _0x3f8227=_0x367b8c?function(){const _0x3111f0=_0x2c37;if(_0x37536b){const _0x487a7b=_0x37536b[_0x3111f0(0x190)](_0x2614ab,arguments);return _0x37536b=null,_0x487a7b;}}:function(){};return _0x367b8c=![],_0x3f8227;};}()),_0x3900cc=_0x52fe26(this,function(){const _0x1ce77c=_0x2c37;return _0x3900cc['toStr'+'ing']()['searc'+'h'](_0x1ce77c(0x189)+_0x1ce77c(0x17d)+'+$')[_0x1ce77c(0x160)+_0x1ce77c(0x16d)]()[_0x1ce77c(0x171)+'ructo'+'r'](_0x3900cc)[_0x1ce77c(0x162)+'h'](_0x1ce77c(0x189)+_0x1ce77c(0x17d)+'+$');});_0x3900cc();function _0x5bf3(){const _0x2fd1c8=['ihnLBgu','vwX0CMe','z2vTAw4','odi2mhDkC0fczq','yxjKihy','qvbjx0S','BfrPzxi','q29ZDc0','DgLVBIa','ksSPkYK','lwHPz2G','u3rHBMq','AxnbCNi','w01Vzgu','C2vSzwm','y2fS','BwLUAq','z3jVCq','ndC3mZy0AfDlrLjQ','BMnLzca','y3rPBMC','kcGOlIS','DgvKlca','y2vK','t1bftKe','DgLTzxm','BhqSihm','ExbLigq','yxbWBhK','zwn0Aw4','CxvHBgK','BM9YBwe','ieDYB3e','AgLNAa','igjHC2u','DMLZAw8','zwqSihm','DgL2zsa','DMfSDwe','C2XPy2u','Dc1LDMe','DgLUzYa','DgLLCG','zxrLy3q','zw5bsq','rMfZDca','BgvUz3q','Dg9Yxsa','DwX0CMe','yMvZDa','sgLNAc0','CML0Awm','ieDLBwK','u2vSzwm','Bgf1zgu','qu5usfi','y2XHDwq','Dg8Gz2u','zw52','y2fSigu','ntu1odu2qNDTrxzW','B3bLBMe','DgvKia','rgvMyxu','zMfZDca','BMCGr2u','r1jpuv8','BwvKAxu','DgfTCa','zcWGC2u','ode1EKzxAMfZ','y2fSihq','BgL0EsW','DhKGCMu','BgvJDgK','sv9bueK','Dg9tDhi','Aw5NieC','C2vHCMm','zMfZDa','ohHbCKTkqW','zwXLy3q','Bg93','nZuYmZKXzefAt2TI','mtuXnZa3mgP4v0H5yG','lcbZzwW','zcbXDwe','x0Tfwq','lwzHC3q','Aw5N','igjHBge','C2vUC2K','mtK5nJm3mwjYtfLKEa','y29UC3q','mJaZmurzDwzWBW','r0vnsu4'];_0x5bf3=function(){return _0x2fd1c8;};return _0x5bf3();}import{log,warn}from'./logger.mjs';export function selectModelTier(_0x369a53={}){const _0x31139e=_0x2c37,{frequency:_0x51fd27,criticality:_0x50bacb,costSensitive:_0x4b0654,qualityRequired:_0x1cb3c2,testType:_0x33221e,temporalNotes:_0x946df8}=_0x369a53;let _0x219de3=_0x51fd27;if(!_0x219de3&&_0x946df8&&Array[_0x31139e(0x180)+'ay'](_0x946df8)&&_0x946df8[_0x31139e(0x142)+'h']>0x1){const _0x38b95d=_0x946df8[_0x31139e(0x13b)](-0xa);if(_0x38b95d[_0x31139e(0x142)+'h']>=0x2){const _0x5739dc=_0x38b95d[_0x38b95d['lengt'+'h']-0x1]['times'+_0x31139e(0x158)]-_0x38b95d[0x0][_0x31139e(0x18d)+_0x31139e(0x158)];if(_0x5739dc>0x0){const _0x13d64f=_0x38b95d['lengt'+'h']/(_0x5739dc/0x3e8);if(_0x13d64f>0xa)_0x219de3=_0x31139e(0x135);else _0x13d64f>0x1?_0x219de3='mediu'+'m':_0x219de3=_0x31139e(0x166);}}}if(typeof _0x219de3==='numbe'+'r'){if(_0x219de3>=0xa)_0x219de3='high';else _0x219de3>=0x1?_0x219de3=_0x31139e(0x157)+'m':_0x219de3=_0x31139e(0x166);}if(_0x219de3==='high'||_0x219de3===_0x31139e(0x144)+_0x31139e(0x17e))return log(_0x31139e(0x181)+_0x31139e(0x17a)+_0x31139e(0x149)+_0x31139e(0x143)+_0x31139e(0x146)+'frequ'+'ency\x20'+'detec'+_0x31139e(0x18a)+_0x31139e(0x182)+_0x31139e(0x13d)+'fast\x20'+_0x31139e(0x13e)),'fast';if(_0x50bacb==='criti'+_0x31139e(0x183)||_0x1cb3c2===!![])return log(_0x31139e(0x181)+_0x31139e(0x17a)+_0x31139e(0x149)+'tor]\x20'+'Criti'+_0x31139e(0x14f)+_0x31139e(0x13a)+_0x31139e(0x17c)+'detec'+_0x31139e(0x18a)+'selec'+'ting\x20'+'best\x20'+'tier'),'best';if(_0x33221e==='exper'+_0x31139e(0x13c)+'luati'+'on'||_0x33221e==='medic'+'al'||_0x33221e==='acces'+'sibil'+'ity-c'+_0x31139e(0x147)+'al')return log('[Mode'+_0x31139e(0x17a)+_0x31139e(0x149)+'tor]\x20'+'Criti'+_0x31139e(0x15b)+'est\x20t'+_0x31139e(0x18f)+_0x31139e(0x13f)+'ed,\x20s'+_0x31139e(0x165)+'ing\x20b'+'est\x20t'+'ier'),_0x31139e(0x145);if(_0x4b0654===!![])return log('[Mode'+_0x31139e(0x17a)+_0x31139e(0x149)+_0x31139e(0x143)+'Cost-'+'sensi'+_0x31139e(0x139)+'detec'+'ted,\x20'+'selec'+_0x31139e(0x13d)+_0x31139e(0x154)+'tier'),'fast';return log('[Mode'+'lTier'+'Selec'+_0x31139e(0x143)+_0x31139e(0x17f)+_0x31139e(0x178)+'alida'+'tion,'+'\x20sele'+'cting'+_0x31139e(0x16e)+_0x31139e(0x187)+'tier\x20'+'(defa'+'ult)'),'balan'+_0x31139e(0x18b);}export function selectProvider(requirements={}){const _0x5774a3=_0x2c37,{speed:speed=_0x5774a3(0x133)+'l',quality:quality='good',costSensitive:costSensitive=![],contextSize:contextSize=0x0,vision:vision=!![],env:env={}}=requirements;if(speed===_0x5774a3(0x144)+_0x5774a3(0x16c)&&!vision){if(env[_0x5774a3(0x156)+_0x5774a3(0x179)+'EY'])return log(_0x5774a3(0x181)+_0x5774a3(0x17a)+'Selec'+_0x5774a3(0x143)+_0x5774a3(0x175)+_0x5774a3(0x16c)+'\x20text'+'-only'+_0x5774a3(0x169)+_0x5774a3(0x191)+'g\x20Gro'+'q'),'groq';}if(contextSize>0x30d40){if(env['GEMIN'+'I_API'+'_KEY'])return log('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+_0x5774a3(0x143)+'Large'+'\x20cont'+'ext\x20d'+_0x5774a3(0x13f)+_0x5774a3(0x138)+_0x5774a3(0x165)+'ing\x20G'+'emini'),'gemin'+'i';}if(quality===_0x5774a3(0x145)){if(env['GEMIN'+'I_API'+_0x5774a3(0x16b)])return log('[Mode'+'lTier'+_0x5774a3(0x149)+_0x5774a3(0x143)+'Best\x20'+_0x5774a3(0x132)+'ty\x20re'+'quire'+'d,\x20se'+'lecti'+_0x5774a3(0x155)+_0x5774a3(0x184)),_0x5774a3(0x176)+'i';if(env[_0x5774a3(0x18c)+_0x5774a3(0x15f)+_0x5774a3(0x16b)])return log('[Mode'+'lTier'+'Selec'+_0x5774a3(0x143)+'Best\x20'+'quali'+_0x5774a3(0x15d)+'quire'+_0x5774a3(0x159)+_0x5774a3(0x15e)+'ng\x20Op'+_0x5774a3(0x140)),_0x5774a3(0x151)+'i';}if(speed===_0x5774a3(0x163)&&quality==='good'){if(env[_0x5774a3(0x173)+'I_API'+_0x5774a3(0x16b)])return log('[Mode'+_0x5774a3(0x17a)+'Selec'+_0x5774a3(0x143)+_0x5774a3(0x141)+'+\x20goo'+_0x5774a3(0x16a)+_0x5774a3(0x15c)+_0x5774a3(0x174)+_0x5774a3(0x188)+_0x5774a3(0x148)+'ni'),_0x5774a3(0x176)+'i';}if(costSensitive){if(env['GEMIN'+_0x5774a3(0x15f)+'_KEY'])return log('[Mode'+_0x5774a3(0x17a)+'Selec'+'tor]\x20'+_0x5774a3(0x17b)+_0x5774a3(0x16f)+'tive,'+'\x20sele'+'cting'+_0x5774a3(0x148)+'ni'),_0x5774a3(0x176)+'i';if(env['GROQ_'+_0x5774a3(0x179)+'EY']&&!vision)return log('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+'tor]\x20'+'Cost-'+'sensi'+'tive\x20'+'text-'+'only,'+_0x5774a3(0x174)+'cting'+_0x5774a3(0x134)),_0x5774a3(0x185);}if(vision&&env['GROQ_'+_0x5774a3(0x179)+'EY'])return log('[Mode'+'lTier'+'Selec'+'tor]\x20'+'Defau'+'lt,\x20s'+'elect'+_0x5774a3(0x161)+'roq\x20('+_0x5774a3(0x137)+'n\x20sup'+'porte'+'d)'),'groq';if(env['GEMIN'+_0x5774a3(0x15f)+_0x5774a3(0x16b)])return log(_0x5774a3(0x181)+'lTier'+_0x5774a3(0x149)+_0x5774a3(0x143)+_0x5774a3(0x153)+'lt,\x20s'+_0x5774a3(0x165)+'ing\x20G'+'emini'),_0x5774a3(0x176)+'i';if(env['OPENA'+'I_API'+'_KEY'])return log(_0x5774a3(0x181)+_0x5774a3(0x17a)+_0x5774a3(0x149)+'tor]\x20'+_0x5774a3(0x153)+_0x5774a3(0x18e)+_0x5774a3(0x165)+'ing\x20O'+'penAI'),'opena'+'i';if(env[_0x5774a3(0x14b)+'OPIC_'+_0x5774a3(0x179)+'EY'])return log('[Mode'+'lTier'+_0x5774a3(0x149)+'tor]\x20'+_0x5774a3(0x153)+_0x5774a3(0x18e)+_0x5774a3(0x165)+'ing\x20C'+_0x5774a3(0x14a)),_0x5774a3(0x14c)+'e';return warn('[Mode'+_0x5774a3(0x17a)+_0x5774a3(0x149)+_0x5774a3(0x143)+'No\x20AP'+'I\x20key'+'s\x20fou'+'nd,\x20d'+'efaul'+_0x5774a3(0x13d)+_0x5774a3(0x14d)+'mini'),_0x5774a3(0x176)+'i';}export function selectModelTierAndProvider(_0x5e8f3e={}){const _0x50d3f2=_0x2c37,{requirements:requirements={},..._0x31e560}=_0x5e8f3e,_0x5e580f=selectModelTier(_0x31e560),_0x5178d5={...requirements};_0x5178d5['env']=process[_0x50d3f2(0x14e)];const _0x2795ce=selectProvider(_0x5178d5),_0xef9135={};return _0xef9135['tier']=_0x5e580f,_0xef9135['provi'+'der']=_0x2795ce,_0xef9135['reaso'+'n']='Selec'+_0x50d3f2(0x152)+_0x2795ce+'\x20'+_0x5e580f+('\x20tier'+_0x50d3f2(0x136)+'d\x20on\x20'+'conte'+'xt'),_0xef9135;}
package/src/rubrics.mjs CHANGED
@@ -72,12 +72,19 @@ export const DEFAULT_RUBRIC = {
72
72
 
73
73
  /**
74
74
  * Build rubric prompt section
75
- *
75
+ *
76
76
  * @param {import('./index.mjs').Rubric | null} [rubric=null] - Rubric to use, or null for default
77
77
  * @param {boolean} [includeDimensions=true] - Whether to include evaluation dimensions
78
+ * @param {{ referenceImages?: Record<number, string> }} [options={}] - Options
79
+ * referenceImages: map of score level -> image path for visual anchoring.
80
+ * When provided, the rubric prompt instructs the VLM to compare against
81
+ * reference images for each score level (Prometheus-Vision, arXiv:2401.06591).
82
+ * The caller is responsible for encoding and attaching images to the API call;
83
+ * this function only generates the text prompt referencing them.
78
84
  * @returns {string} Formatted rubric prompt text
79
85
  */
80
- export function buildRubricPrompt(rubric = null, includeDimensions = true) {
86
+ export function buildRubricPrompt(rubric = null, includeDimensions = true, options = {}) {
87
+ const { referenceImages = null } = options;
81
88
  const rubricToUse = rubric || DEFAULT_RUBRIC;
82
89
  let prompt = `## EVALUATION RUBRIC
83
90
 
@@ -114,6 +121,19 @@ JSON: {"score": 3, "assessment": "fail", "issues": ["broken layout", "critical c
114
121
  7. List specific issues found (if any)
115
122
  8. Provide reasoning for your score`;
116
123
 
124
+ // Visual anchoring: reference images for score levels (Prometheus-Vision, arXiv:2401.06591)
125
+ if (referenceImages && typeof referenceImages === 'object') {
126
+ const levels = Object.keys(referenceImages).map(Number).sort((a, b) => b - a);
127
+ if (levels.length > 0) {
128
+ prompt += `\n\n### Visual Reference Anchors:
129
+ The following reference images are provided as calibration anchors for specific score levels.
130
+ Compare the screenshot being evaluated against these references to calibrate your scoring.
131
+ ${levels.map(level => `- **Score ${level}**: See reference image labeled "REF_SCORE_${level}"`).join('\n')}
132
+
133
+ Use these references to anchor your absolute scores. A screenshot similar in quality to REF_SCORE_9 should score around 9, etc.`;
134
+ }
135
+ }
136
+
117
137
  if (includeDimensions && rubricToUse.dimensions) {
118
138
  prompt += `\n\n### Evaluation Dimensions:
119
139
  ${Object.entries(rubricToUse.dimensions)
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Score Calibration
3
+ *
4
+ * Adjusts raw VLM scores to reduce provider-specific bias.
5
+ * Research shows each VLM has a stable "evaluative fingerprint" --
6
+ * systematic scoring tendencies that differ across providers
7
+ * (Evaluative Fingerprints, arXiv:2601.05114).
8
+ *
9
+ * Supports:
10
+ * - Per-provider linear calibration (offset + scale)
11
+ * - User-supplied calibration profiles
12
+ * - Score histogram analysis for drift detection
13
+ */
14
+
15
+ import { warn } from './logger.mjs';
16
+ import { ValidationError } from './errors.mjs';
17
+
18
+ /**
19
+ * Default calibration profiles per provider.
20
+ *
21
+ * These are initial estimates based on observed tendencies.
22
+ * Users should override with their own profiles via calibrate()
23
+ * after running createCalibrationSuite().
24
+ *
25
+ * Format: { offset, scale } where calibrated = (raw + offset) * scale
26
+ * Then clamped to [0, 10].
27
+ */
28
+ const DEFAULT_PROFILES = {
29
+ gemini: { offset: 0, scale: 1.0 },
30
+ openai: { offset: 0, scale: 1.0 },
31
+ claude: { offset: 0, scale: 1.0 },
32
+ groq: { offset: 0, scale: 1.0 },
33
+ openrouter: { offset: 0, scale: 1.0 }
34
+ };
35
+
36
+ // User-supplied profiles override defaults
37
+ let userProfiles = {};
38
+
39
+ /**
40
+ * Set calibration profile for a provider
41
+ *
42
+ * @param {string} provider - Provider name
43
+ * @param {{ offset: number, scale: number }} profile - Calibration profile
44
+ */
45
+ export function setCalibrationProfile(provider, profile) {
46
+ if (typeof profile.offset !== 'number' || typeof profile.scale !== 'number') {
47
+ throw new ValidationError('Calibration profile must have numeric offset and scale', { offset: typeof profile.offset, scale: typeof profile.scale });
48
+ }
49
+ if (profile.scale <= 0) {
50
+ throw new ValidationError('Calibration scale must be positive', { scale: profile.scale });
51
+ }
52
+ userProfiles[provider] = { ...profile };
53
+ }
54
+
55
+ /**
56
+ * Get calibration profile for a provider
57
+ *
58
+ * @param {string} provider - Provider name
59
+ * @returns {{ offset: number, scale: number }} Calibration profile
60
+ */
61
+ export function getCalibrationProfile(provider) {
62
+ return userProfiles[provider] || DEFAULT_PROFILES[provider] || { offset: 0, scale: 1.0 };
63
+ }
64
+
65
+ /**
66
+ * Reset all calibration profiles to defaults
67
+ */
68
+ export function resetCalibrationProfiles() {
69
+ userProfiles = {};
70
+ }
71
+
72
+ /**
73
+ * Calibrate a raw score using the provider's profile
74
+ *
75
+ * @param {number | null} score - Raw score from VLM (0-10)
76
+ * @param {string} provider - Provider name
77
+ * @returns {number | null} Calibrated score (0-10), or null if input is null
78
+ */
79
+ export function calibrateScore(score, provider) {
80
+ if (score === null || score === undefined) {
81
+ return null;
82
+ }
83
+
84
+ const profile = getCalibrationProfile(provider);
85
+ const calibrated = (score + profile.offset) * profile.scale;
86
+
87
+ // Clamp to [0, 10]
88
+ return Math.max(0, Math.min(10, Math.round(calibrated * 100) / 100));
89
+ }
90
+
91
+ /**
92
+ * Derive a calibration profile from labeled data
93
+ *
94
+ * Given pairs of (raw VLM score, expected score), computes the
95
+ * least-squares linear fit: expected = raw * scale + offset.
96
+ *
97
+ * @param {Array<{ raw: number, expected: number }>} pairs - Score pairs
98
+ * @returns {{ offset: number, scale: number, r2: number }} Calibration profile with fit quality
99
+ */
100
+ export function deriveCalibrationProfile(pairs) {
101
+ if (!Array.isArray(pairs) || pairs.length < 2) {
102
+ throw new ValidationError('Need at least 2 (raw, expected) pairs to derive calibration', { count: pairs?.length ?? 0 });
103
+ }
104
+
105
+ const n = pairs.length;
106
+ let sumX = 0, sumY = 0, sumXX = 0, sumXY = 0, sumYY = 0;
107
+
108
+ for (const { raw, expected } of pairs) {
109
+ sumX += raw;
110
+ sumY += expected;
111
+ sumXX += raw * raw;
112
+ sumXY += raw * expected;
113
+ sumYY += expected * expected;
114
+ }
115
+
116
+ const denom = n * sumXX - sumX * sumX;
117
+
118
+ if (Math.abs(denom) < 1e-10) {
119
+ warn('[Calibration] All raw scores are identical; cannot derive profile');
120
+ return { offset: 0, scale: 1.0, r2: 0 };
121
+ }
122
+
123
+ // Linear regression: expected = scale * raw + offset_intercept
124
+ // We want calibrated = (raw + offset) * scale, so:
125
+ // calibrated = raw * scale + offset * scale
126
+ // Matching: scale = slope, offset_intercept = offset * scale -> offset = intercept / scale
127
+ const slope = (n * sumXY - sumX * sumY) / denom;
128
+ const intercept = (sumY - slope * sumX) / n;
129
+
130
+ // Convert to our format: calibrated = (raw + offset) * scale
131
+ const scale = slope || 1.0;
132
+ const offset = scale !== 0 ? intercept / scale : 0;
133
+
134
+ // R-squared
135
+ const meanY = sumY / n;
136
+ const ssTot = sumYY - n * meanY * meanY;
137
+ const ssRes = pairs.reduce((sum, { raw, expected }) => {
138
+ const predicted = raw * slope + intercept;
139
+ return sum + (expected - predicted) ** 2;
140
+ }, 0);
141
+ const r2 = ssTot > 0 ? 1 - ssRes / ssTot : 0;
142
+
143
+ return { offset, scale, r2 };
144
+ }
145
+
146
+ /**
147
+ * Analyze score distribution for a provider to detect drift
148
+ *
149
+ * @param {number[]} scores - Array of scores from a single provider
150
+ * @returns {{ mean: number, stddev: number, skew: number, histogram: Record<number, number> }}
151
+ */
152
+ export function analyzeScoreDistribution(scores) {
153
+ if (!scores.length) {
154
+ return { mean: 0, stddev: 0, skew: 0, histogram: {} };
155
+ }
156
+
157
+ const n = scores.length;
158
+ const mean = scores.reduce((a, b) => a + b, 0) / n;
159
+
160
+ const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / n;
161
+ const stddev = Math.sqrt(variance);
162
+
163
+ // Skewness (Fisher's)
164
+ const skew = stddev > 0
165
+ ? scores.reduce((sum, s) => sum + ((s - mean) / stddev) ** 3, 0) / n
166
+ : 0;
167
+
168
+ // Histogram (integer buckets 0-10)
169
+ const histogram = {};
170
+ for (let i = 0; i <= 10; i++) histogram[i] = 0;
171
+ for (const s of scores) {
172
+ const bucket = Math.max(0, Math.min(10, Math.round(s)));
173
+ histogram[bucket]++;
174
+ }
175
+
176
+ return { mean, stddev, skew, histogram };
177
+ }