npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 - Mend

@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/.secretsignore.example +20 -0
package/CHANGELOG.md +360 -0
package/CONTRIBUTING.md +63 -0
package/DEPLOYMENT.md +80 -0
package/LICENSE +22 -0
package/README.md +142 -0
package/SECURITY.md +108 -0
package/api/health.js +34 -0
package/api/validate.js +252 -0
package/index.d.ts +1221 -0
package/package.json +112 -0
package/public/index.html +149 -0
package/src/batch-optimizer.mjs +451 -0
package/src/bias-detector.mjs +370 -0
package/src/bias-mitigation.mjs +233 -0
package/src/cache.mjs +433 -0
package/src/config.mjs +268 -0
package/src/constants.mjs +80 -0
package/src/context-compressor.mjs +350 -0
package/src/convenience.mjs +617 -0
package/src/cost-tracker.mjs +257 -0
package/src/cross-modal-consistency.mjs +170 -0
package/src/data-extractor.mjs +232 -0
package/src/dynamic-few-shot.mjs +140 -0
package/src/dynamic-prompts.mjs +361 -0
package/src/ensemble/index.mjs +53 -0
package/src/ensemble-judge.mjs +366 -0
package/src/error-handler.mjs +67 -0
package/src/errors.mjs +167 -0
package/src/experience-propagation.mjs +128 -0
package/src/experience-tracer.mjs +487 -0
package/src/explanation-manager.mjs +299 -0
package/src/feedback-aggregator.mjs +248 -0
package/src/game-goal-prompts.mjs +478 -0
package/src/game-player.mjs +548 -0
package/src/hallucination-detector.mjs +155 -0
package/src/helpers/playwright.mjs +80 -0
package/src/human-validation-manager.mjs +516 -0
package/src/index.mjs +364 -0
package/src/judge.mjs +929 -0
package/src/latency-aware-batch-optimizer.mjs +192 -0
package/src/load-env.mjs +159 -0
package/src/logger.mjs +55 -0
package/src/metrics.mjs +187 -0
package/src/model-tier-selector.mjs +221 -0
package/src/multi-modal/index.mjs +36 -0
package/src/multi-modal-fusion.mjs +190 -0
package/src/multi-modal.mjs +524 -0
package/src/natural-language-specs.mjs +1071 -0
package/src/pair-comparison.mjs +277 -0
package/src/persona/index.mjs +42 -0
package/src/persona-enhanced.mjs +200 -0
package/src/persona-experience.mjs +572 -0
package/src/position-counterbalance.mjs +140 -0
package/src/prompt-composer.mjs +375 -0
package/src/render-change-detector.mjs +583 -0
package/src/research-enhanced-validation.mjs +436 -0
package/src/retry.mjs +152 -0
package/src/rubrics.mjs +231 -0
package/src/score-tracker.mjs +277 -0
package/src/smart-validator.mjs +447 -0
package/src/spec-config.mjs +106 -0
package/src/spec-templates.mjs +347 -0
package/src/specs/index.mjs +38 -0
package/src/temporal/index.mjs +102 -0
package/src/temporal-adaptive.mjs +163 -0
package/src/temporal-batch-optimizer.mjs +222 -0
package/src/temporal-constants.mjs +69 -0
package/src/temporal-context.mjs +49 -0
package/src/temporal-decision-manager.mjs +271 -0
package/src/temporal-decision.mjs +669 -0
package/src/temporal-errors.mjs +58 -0
package/src/temporal-note-pruner.mjs +173 -0
package/src/temporal-preprocessor.mjs +543 -0
package/src/temporal-prompt-formatter.mjs +219 -0
package/src/temporal-validation.mjs +159 -0
package/src/temporal.mjs +415 -0
package/src/type-guards.mjs +311 -0
package/src/uncertainty-reducer.mjs +470 -0
package/src/utils/index.mjs +175 -0
package/src/validation-framework.mjs +321 -0
package/src/validation-result-normalizer.mjs +64 -0
package/src/validation.mjs +243 -0
package/src/validators/accessibility-programmatic.mjs +345 -0
package/src/validators/accessibility-validator.mjs +223 -0
package/src/validators/batch-validator.mjs +143 -0
package/src/validators/hybrid-validator.mjs +268 -0
package/src/validators/index.mjs +34 -0
package/src/validators/prompt-builder.mjs +218 -0
package/src/validators/rubric.mjs +85 -0
package/src/validators/state-programmatic.mjs +260 -0
package/src/validators/state-validator.mjs +291 -0
package/vercel.json +27 -0

package/src/cache.mjs ADDED Viewed

@@ -0,0 +1,433 @@
+/**
+ * VLLM Cache
+ *
+ * Provides persistent caching for VLLM API calls to reduce costs and improve performance.
+ * Uses file-based storage for cache persistence across test runs.
+ *
+ * BUGS FIXED (2025-01):
+ * 1. Timestamp reset on save - was resetting ALL timestamps to `now`, breaking 7-day expiration
+ * 2. Cache key truncation - was truncating prompts/gameState, causing collisions
+ *
+ * ARCHITECTURE NOTES:
+ * - This is ONE of THREE cache systems in the codebase (see docs/CACHE_ARCHITECTURE_DEEP_DIVE.md)
+ * - File-based, persistent across runs (7-day TTL, LRU eviction)
+ * - Purpose: Long-term persistence of API responses across restarts
+ * - Why separate: Different persistence strategy (file vs memory), different lifetime (7 days vs process lifetime),
+ *   different failure domain (disk errors don't affect in-memory batching), minimal data overlap (<5%)
+ * - No coordination with BatchOptimizer cache or TemporalPreprocessing cache (by design - they serve different purposes)
+ */
+import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'fs';
+import { join, dirname, normalize, resolve } from 'path';
+import { createHash } from 'crypto';
+import { fileURLToPath } from 'url';
+import { Mutex } from 'async-mutex';
+import { CacheError, FileError } from './errors.mjs';
+import { warn, log } from './logger.mjs';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+import { CACHE_CONSTANTS } from './constants.mjs';
+// Default cache directory (can be overridden)
+let CACHE_DIR = null;
+let CACHE_FILE = null;
+const MAX_CACHE_AGE = CACHE_CONSTANTS.MAX_CACHE_AGE_MS;
+const MAX_CACHE_SIZE = CACHE_CONSTANTS.MAX_CACHE_SIZE;
+const MAX_CACHE_SIZE_BYTES = CACHE_CONSTANTS.MAX_CACHE_SIZE_BYTES;
+// Cache instance
+let cacheInstance = null;
+// Cache write mutex to prevent race conditions (proper async mutex)
+const cacheWriteMutex = new Mutex();
+// VERIFIABLE: Track cache metrics to verify claims about atomic writes
+// Initialize to empty object so metrics are always available (even before first save)
+let cacheMetrics = { atomicWrites: 0, atomicWriteFailures: 0, tempFileCleanups: 0 };
+/**
+ * Initialize cache with directory
+ *
+ * @param {string | undefined} [cacheDir] - Cache directory path, or undefined for default
+ * @returns {void}
+ */
+export function initCache(cacheDir) {
+  // SECURITY: Validate and normalize cache directory to prevent path traversal
+  if (cacheDir) {
+    const normalized = normalize(resolve(cacheDir));
+    // Prevent path traversal
+    if (normalized.includes('..')) {
+      throw new CacheError('Invalid cache directory: path traversal detected', { cacheDir });
+    }
+    CACHE_DIR = normalized;
+  } else {
+    CACHE_DIR = join(__dirname, '..', '..', '..', 'test-results', 'vllm-cache');
+  }
+  CACHE_FILE = join(CACHE_DIR, 'cache.json');
+  if (!existsSync(CACHE_DIR)) {
+    mkdirSync(CACHE_DIR, { recursive: true });
+  }
+  cacheInstance = null; // Reset instance to reload
+}
+/**
+ * Generate cache key from image path, prompt, and context
+ *
+ * @param {string} imagePath - Path to image file
+ * @param {string} prompt - Validation prompt
+ * @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
+ * @returns {string} SHA-256 hash of cache key
+ */
+export function generateCacheKey(imagePath, prompt, context = {}) {
+  // NOTE: Don't truncate cache keys - it causes collisions!
+  //
+  // The bug: Truncating prompt (1000 chars) and gameState (500 chars) means:
+  // - Different prompts with same first 1000 chars = same cache key = wrong cache hit
+  // - Different game states with same first 500 chars = same cache key = wrong cache hit
+  //
+  // The fix: Hash the FULL content, don't truncate
+  // SHA-256 handles arbitrary length, so there's no reason to truncate
+  //
+  // Why truncation existed: Probably to keep keys "manageable", but it's dangerous
+  // Better approach: Hash full content, collisions are cryptographically unlikely
+  const keyData = {
+    imagePath,
+    prompt, // Full prompt, not truncated
+    testType: context.testType || '',
+    frame: context.frame || '',
+    score: context.score || '',
+    viewport: context.viewport ? JSON.stringify(context.viewport) : '',
+    gameState: context.gameState ? JSON.stringify(context.gameState) : '' // Full game state, not truncated
+  };
+  const keyString = JSON.stringify(keyData);
+  return createHash('sha256').update(keyString).digest('hex');
+}
+/**
+ * Load cache from file
+ *
+ * NOTE: Preserves original timestamps from file for expiration logic.
+ * We need the original timestamp to check if entries are older than MAX_CACHE_AGE (7 days).
+ *
+ * The cache file format is: { key: { data: {...}, timestamp: number } }
+ * - `timestamp`: When the entry was created (used for expiration)
+ * - `data._lastAccessed`: When the entry was last accessed (used for LRU eviction)
+ */
+function loadCache() {
+  if (!CACHE_FILE || !existsSync(CACHE_FILE)) {
+    return new Map();
+  }
+  try {
+    let cacheData;
+    try {
+      cacheData = JSON.parse(readFileSync(CACHE_FILE, 'utf8'));
+    } catch (parseError) {
+      // SECURITY: Handle malformed JSON gracefully to prevent DoS
+      warn(`[VLLM Cache] Failed to parse cache file (corrupted?): ${parseError.message}`);
+      // Recover by starting with empty cache
+      return new Map();
+    }
+    const cache = new Map();
+    const now = Date.now();
+    // Filter out expired entries based on ORIGINAL timestamp
+    // IMPORTANT: We preserve the original timestamp from the file
+    // This allows 7-day expiration to work correctly
+    for (const [key, value] of Object.entries(cacheData)) {
+      if (value.timestamp && (now - value.timestamp) < MAX_CACHE_AGE) {
+        // Preserve both the data and the original timestamp
+        // The timestamp is stored in the file, not in the data object
+        // But we need to track it for expiration, so we store it in the data
+        const entry = {
+          ...value.data,
+          _originalTimestamp: value.timestamp // Preserve for expiration checks
+        };
+        cache.set(key, entry);
+      }
+    }
+    return cache;
+  } catch (error) {
+    warn(`[VLLM Cache] Failed to load cache: ${error.message}`);
+    return new Map();
+  }
+}
+/**
+ * Save cache to file with size limits and race condition protection
+ *
+ * Uses async mutex to prevent concurrent writes and atomic file operations
+ * to prevent corruption.
+ */
+async function saveCache(cache) {
+  if (!CACHE_FILE) return;
+  // Use proper async mutex to prevent concurrent writes
+  // This ensures only one save operation happens at a time, even with async operations
+  const release = await cacheWriteMutex.acquire();
+  try {
+    const cacheData = {};
+    const now = Date.now();
+    let totalSize = 0;
+    // BUG FIX (2025-01): Don't reset timestamps on save!
+    //
+    // The bug was: `timestamp: now` for ALL entries
+    // This broke 7-day expiration because old entries got new timestamps
+    //
+    // The fix: Preserve original timestamp for existing entries, use `now` only for new entries
+    //
+    // Two timestamps serve different purposes:
+    // - `timestamp`: Creation time (for expiration - 7 days)
+    // - `_lastAccessed`: Access time (for LRU eviction - least recently used)
+    //
+    // Convert to array and sort by _lastAccessed (LRU: oldest access first)
+    const entries = Array.from(cache.entries())
+      .map(([key, value]) => {
+        // Preserve original timestamp if it exists, otherwise use current time (new entry)
+        const originalTimestamp = value._originalTimestamp || now;
+        // Remove _originalTimestamp from data before saving (it's metadata, not part of result)
+        const { _originalTimestamp, ...dataWithoutMetadata } = value;
+        return {
+          key,
+          value: dataWithoutMetadata,
+          timestamp: originalTimestamp, // Preserve original, don't reset!
+          lastAccessed: value._lastAccessed || originalTimestamp
+        };
+      })
+      .sort((a, b) => {
+        // Sort by access time for LRU eviction (oldest access = evict first)
+        return a.lastAccessed - b.lastAccessed;
+      });
+    // Apply size limits (LRU eviction: keep most recently accessed)
+    const entriesToKeep = entries.slice(-MAX_CACHE_SIZE);
+    for (const { key, value, timestamp } of entriesToKeep) {
+      const entry = {
+        data: value,
+        timestamp // Original timestamp preserved for expiration
+      };
+      const entrySize = JSON.stringify(entry).length;
+      // Check total size limit
+      if (totalSize + entrySize > MAX_CACHE_SIZE_BYTES) {
+        break; // Stop adding entries if we exceed size limit
+      }
+      cacheData[key] = entry;
+      totalSize += entrySize;
+    }
+    // Update in-memory cache to match saved entries
+    // IMPORTANT: Restore _originalTimestamp for expiration checks
+    cache.clear();
+    for (const [key, entry] of Object.entries(cacheData)) {
+      const entryWithMetadata = {
+        ...entry.data,
+        _originalTimestamp: entry.timestamp // Restore for expiration checks
+      };
+      cache.set(key, entryWithMetadata);
+    }
+    // ATOMIC WRITE: Write to temp file first, then rename atomically
+    // This prevents corruption if process crashes during write
+    // Note: writeFileSync flushes to OS buffers; rename is atomic on most filesystems
+    // For stronger durability guarantees, we could add fsync, but it adds latency
+    // The current approach balances performance and safety for cache use case
+    // VERIFIABLE: Track atomic write operations to verify "prevents corruption" claim
+    // CRITICAL FIX: Handle renameSync failure separately to ensure temp file cleanup
+    // MCP research: If writeFileSync succeeds but renameSync fails, temp file must be cleaned up
+    const tempFile = CACHE_FILE + '.tmp';
+    const writeStartTime = Date.now();
+    let writeSucceeded = false;
+    let renameSucceeded = false;
+    try {
+      writeFileSync(tempFile, JSON.stringify(cacheData, null, 2), 'utf8');
+      writeSucceeded = true;
+      renameSync(tempFile, CACHE_FILE); // Atomic operation on most filesystems
+      renameSucceeded = true;
+      const writeDuration = Date.now() - writeStartTime;
+      // Track successful atomic writes (for metrics)
+      // NOTE: cacheMetrics is initialized at module level
+      cacheMetrics.atomicWrites++;
+      // Log in debug mode for verification
+      if (process.env.DEBUG_CACHE) {
+        log(`[VLLM Cache] Atomic write completed in ${writeDuration}ms (${Object.keys(cacheData).length} entries)`);
+      }
+    } catch (writeOrRenameError) {
+      // CRITICAL FIX: If write succeeded but rename failed, clean up temp file
+      // MCP research confirms this is a critical edge case
+      if (writeSucceeded && !renameSucceeded) {
+        try {
+          if (existsSync(tempFile)) {
+            unlinkSync(tempFile);
+            cacheMetrics.tempFileCleanups++;
+            if (process.env.DEBUG_CACHE) {
+              log(`[VLLM Cache] Cleaned up temp file after renameSync failure`);
+            }
+          }
+        } catch (cleanupError) {
+          // Ignore cleanup errors, but log them
+          warn(`[VLLM Cache] Failed to clean up temp file after rename failure: ${cleanupError.message}`);
+        }
+      }
+      // Re-throw to be caught by outer catch block
+      throw writeOrRenameError;
+    }
+  } catch (error) {
+    // VERIFIABLE: Track failures to verify atomic write claim
+    // NOTE: cacheMetrics is initialized at module level
+    cacheMetrics.atomicWriteFailures++;
+    warn(`[VLLM Cache] Failed to save cache: ${error.message}`);
+    // Clean up temp file if it exists
+    try {
+      const tempFile = CACHE_FILE + '.tmp';
+      if (existsSync(tempFile)) {
+        unlinkSync(tempFile);
+        cacheMetrics.tempFileCleanups++;
+        // VERIFIABLE: Log temp file cleanup to verify atomic write safety
+        if (process.env.DEBUG_CACHE) {
+          log(`[VLLM Cache] Cleaned up temp file after failed atomic write`);
+        }
+      }
+    } catch (cleanupError) {
+      // Ignore cleanup errors
+    }
+  } finally {
+    release(); // Release mutex
+  }
+}
+/**
+ * Get cache instance (singleton)
+ */
+function getCache() {
+  if (!cacheInstance) {
+    if (!CACHE_DIR) {
+      initCache(); // Initialize with default directory
+    }
+    cacheInstance = loadCache();
+  }
+  return cacheInstance;
+}
+/**
+ * Get cached result
+ *
+ * @param {string} imagePath - Path to image file
+ * @param {string} prompt - Validation prompt
+ * @param {import('./index.mjs').ValidationContext} [context={}] - Validation context
+ * @returns {import('./index.mjs').ValidationResult | null} Cached result or null if not found
+ */
+export function getCached(imagePath, prompt, context = {}) {
+  const cache = getCache();
+  const key = generateCacheKey(imagePath, prompt, context);
+  const cached = cache.get(key);
+  if (cached) {
+    // Update access time for LRU eviction
+    // This is separate from timestamp (creation time) which is used for expiration
+    cached._lastAccessed = Date.now();
+    // Check expiration based on original timestamp
+    // If entry is older than MAX_CACHE_AGE, remove it and return null
+    const originalTimestamp = cached._originalTimestamp || cached._lastAccessed;
+    const age = Date.now() - originalTimestamp;
+    if (age > MAX_CACHE_AGE) {
+      cache.delete(key); // Remove expired entry
+      return null;
+    }
+  }
+  return cached || null;
+}
+/**
+ * Set cached result
+ *
+ * @param {string} imagePath - Path to image file
+ * @param {string} prompt - Validation prompt
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
+ * @param {import('./index.mjs').ValidationResult} result - Validation result to cache
+ * @returns {void}
+ */
+export function setCached(imagePath, prompt, context, result) {
+  const cache = getCache();
+  const key = generateCacheKey(imagePath, prompt, context);
+  const now = Date.now();
+  // Check if this is a new entry or updating existing
+  const existing = cache.get(key);
+  const originalTimestamp = existing?._originalTimestamp || now; // Preserve if exists, else new
+  // Add metadata for cache management
+  // - _lastAccessed: For LRU eviction (when was it last used)
+  // - _originalTimestamp: For expiration (when was it created)
+  const resultWithMetadata = {
+    ...result,
+    _lastAccessed: now, // Update access time
+    _originalTimestamp: originalTimestamp // Preserve creation time
+  };
+  cache.set(key, resultWithMetadata);
+  // Always save cache (saveCache handles size limits and LRU eviction)
+  // The if/else was redundant - both branches did the same thing
+  // Save is async and fire-and-forget - errors are logged but don't affect in-memory cache
+  saveCache(cache).catch(error => {
+    warn(`[VLLM Cache] Failed to save cache (non-blocking): ${error.message}`);
+  });
+}
+/**
+ * Clear cache
+ *
+ * @returns {void}
+ */
+export function clearCache() {
+  const cache = getCache();
+  cache.clear();
+  // Save cache to disk (async, fire-and-forget)
+  saveCache(cache).catch(error => {
+    warn(`[VLLM Cache] Failed to save cache after clear (non-blocking): ${error.message}`);
+  });
+}
+/**
+ * Get cache statistics
+ *
+ * VERIFIABLE: Includes atomic write metrics to verify "prevents corruption" claim
+ *
+ * @returns {import('./index.mjs').CacheStats} Cache statistics
+ */
+export function getCacheStats() {
+  const cache = getCache();
+  const stats = {
+    size: cache.size,
+    maxAge: MAX_CACHE_AGE,
+    cacheFile: CACHE_FILE
+  };
+  // VERIFIABLE: Include atomic write metrics to verify "prevents corruption" claim
+  // NOTE: cacheMetrics is always initialized at module level
+  stats.atomicWrites = cacheMetrics.atomicWrites;
+  stats.atomicWriteFailures = cacheMetrics.atomicWriteFailures;
+  stats.tempFileCleanups = cacheMetrics.tempFileCleanups;
+  stats.atomicWriteSuccessRate = cacheMetrics.atomicWrites + cacheMetrics.atomicWriteFailures > 0
+    ? (cacheMetrics.atomicWrites / (cacheMetrics.atomicWrites + cacheMetrics.atomicWriteFailures)) * 100
+    : 100;
+  return stats;
+}

package/src/config.mjs ADDED Viewed

@@ -0,0 +1,268 @@
+/**
+ * Configuration System
+ *
+ * Handles provider selection, API keys, and settings.
+ * Designed to be flexible and extensible.
+ */
+import { ConfigError } from './errors.mjs';
+import { loadEnv } from './load-env.mjs';
+import { API_CONSTANTS } from './constants.mjs';
+// Load .env file automatically on module load
+loadEnv();
+/**
+ * Model tiers for each provider
+ * Updated January 2025: Latest models - Gemini 2.5 Pro, GPT-5, Claude 4.5 Sonnet
+ *
+ * GROQ INTEGRATION (2025):
+ * - Groq added for high-frequency decisions (10-60Hz temporal decisions)
+ * - ~0.22s latency (vs 1-3s for other providers)
+ * - 185-276 tokens/sec throughput
+ * - OpenAI-compatible API
+ * - Cost-competitive, free tier available
+ * - Best for: Fast tier decisions, high-Hz temporal decisions, real-time applications
+ */
+const MODEL_TIERS = {
+  gemini: {
+    fast: 'gemini-2.0-flash-exp',      // Fast, outperforms 1.5 Pro (2x speed)
+    balanced: 'gemini-2.5-pro',        // Best balance (2025 leader, released June 2025)
+    best: 'gemini-2.5-pro'              // Best quality (top vision-language model, 1M+ context)
+  },
+  openai: {
+    fast: 'gpt-4o-mini',               // Fast, cheaper
+    balanced: 'gpt-5',                 // Best balance (released August 2025, unified reasoning)
+    best: 'gpt-5'                      // Best quality (state-of-the-art multimodal, August 2025)
+  },
+  claude: {
+    fast: 'claude-3-5-haiku-20241022', // Fast, cheaper
+    balanced: 'claude-sonnet-4-5',     // Best balance (released September 2025, enhanced vision)
+    best: 'claude-sonnet-4-5'          // Best quality (latest flagship, September 2025)
+  },
+  groq: {
+    // NOTE: Groq vision support requires different model
+    // For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
+    // For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
+    fast: 'meta-llama/llama-4-scout-17b-16e-instruct',   // Vision-capable, fastest Groq option
+    balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
+    best: 'meta-llama/llama-4-scout-17b-16e-instruct'   // Vision-capable, best quality (preview)
+    // WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
+  }
+};
+/**
+ * Default provider configurations
+ *
+ * GROQ INTEGRATION:
+ * - OpenAI-compatible API (easy migration)
+ * - ~0.22s latency (10x faster than typical providers)
+ * - Best for high-frequency decisions (10-60Hz temporal decisions)
+ * - Free tier available for testing
+ */
+const PROVIDER_CONFIGS = {
+  gemini: {
+    name: 'gemini',
+    apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
+    model: 'gemini-2.5-pro',            // Latest: Released June 2025, top vision-language model, 1M+ context
+    freeTier: true,
+    pricing: { input: 1.25, output: 5.00 }, // Updated pricing for 2.5 Pro
+    priority: 1 // Higher priority = preferred
+  },
+  openai: {
+    name: 'openai',
+    apiUrl: 'https://api.openai.com/v1',
+    model: 'gpt-5',                     // Latest: Released August 2025, state-of-the-art multimodal
+    freeTier: false,
+    pricing: { input: 5.00, output: 15.00 }, // Updated pricing for gpt-5
+    priority: 2
+  },
+  claude: {
+    name: 'claude',
+    apiUrl: 'https://api.anthropic.com/v1',
+    model: 'claude-sonnet-4-5',         // Latest: Released September 2025, enhanced vision capabilities
+    freeTier: false,
+    pricing: { input: 3.00, output: 15.00 }, // Updated pricing for 4.5
+    priority: 3
+  },
+  groq: {
+    name: 'groq',
+    apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
+    model: 'meta-llama/llama-4-scout-17b-16e-instruct',   // Vision-capable (preview), ~0.22s latency
+    freeTier: true,                      // Free tier available
+    pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
+    priority: 0,                         // Highest priority for high-frequency decisions
+    latency: 220,                        // ~0.22s latency in ms (10x faster than typical)
+    throughput: 200,                     // ~200 tokens/sec average
+    visionSupported: true               // llama-4-scout-17b-16e-instruct supports vision (preview)
+    // Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
+  }
+};
+/**
+ * Create configuration from environment or options
+ *
+ * @param {import('./index.mjs').ConfigOptions} [options={}] - Configuration options
+ * @returns {import('./index.mjs').Config} Configuration object
+ */
+export function createConfig(options = {}) {
+  const {
+    provider = null,
+    apiKey = null,
+    env = process.env,
+    cacheDir = null,
+    cacheEnabled = true,
+    maxConcurrency = API_CONSTANTS.DEFAULT_MAX_CONCURRENCY,
+    timeout = API_CONSTANTS.DEFAULT_TIMEOUT_MS,
+    verbose = false,
+    modelTier = null, // 'fast', 'balanced', 'best', or null for default
+    model = null      // Explicit model override
+  } = options;
+  // Auto-detect provider if not specified
+  let selectedProvider = provider;
+  if (!selectedProvider) {
+    selectedProvider = detectProvider(env);
+  }
+  // Get API key - respect explicit null/undefined (don't check env if null/undefined is explicitly passed)
+  // Check if apiKey was explicitly provided in options (vs defaulting to null)
+  const apiKeyExplicitlyProvided = 'apiKey' in options;
+  let selectedApiKey;
+  if (apiKeyExplicitlyProvided && (apiKey === null || apiKey === undefined)) {
+    // Explicitly null/undefined - don't check env, use null
+    selectedApiKey = null;
+  } else {
+    // apiKey not provided or has a value - use it if provided, otherwise check env
+    selectedApiKey = apiKey || getApiKey(selectedProvider, env);
+  }
+  // Get provider config
+  let providerConfig = { ...PROVIDER_CONFIGS[selectedProvider] || PROVIDER_CONFIGS.gemini };
+  // Override model if specified
+  if (model) {
+    providerConfig.model = model;
+  } else if (modelTier && MODEL_TIERS[selectedProvider] && MODEL_TIERS[selectedProvider][modelTier]) {
+    // Use tier-based model selection
+    providerConfig.model = MODEL_TIERS[selectedProvider][modelTier];
+  } else if (env.VLM_MODEL_TIER && MODEL_TIERS[selectedProvider] && MODEL_TIERS[selectedProvider][env.VLM_MODEL_TIER]) {
+    // Check environment variable for model tier
+    providerConfig.model = MODEL_TIERS[selectedProvider][env.VLM_MODEL_TIER];
+  } else if (env.VLM_MODEL) {
+    // Explicit model override from environment
+    providerConfig.model = env.VLM_MODEL;
+  }
+  return {
+    provider: selectedProvider,
+    apiKey: selectedApiKey,
+    providerConfig,
+    enabled: !!selectedApiKey,
+    cache: {
+      enabled: cacheEnabled,
+      dir: cacheDir
+    },
+    performance: {
+      maxConcurrency,
+      timeout
+    },
+    debug: {
+      verbose
+    }
+  };
+}
+/**
+ * Detect provider from environment variables
+ */
+function detectProvider(env) {
+  // Priority: explicit VLM_PROVIDER > auto-detect from API keys > default to gemini
+  const explicitProvider = env.VLM_PROVIDER?.trim().toLowerCase();
+  if (explicitProvider && PROVIDER_CONFIGS[explicitProvider]) {
+    return explicitProvider;
+  }
+  // Auto-detect: prefer cheaper/faster providers first
+  // Groq has priority 0 (highest) for high-frequency decisions
+  const availableProviders = Object.values(PROVIDER_CONFIGS)
+    .filter(config => {
+      // Check provider-specific key
+      const providerKey = env[`${config.name.toUpperCase()}_API_KEY`];
+      if (providerKey) {
+        return true;
+      }
+      // Special case: Anthropic uses ANTHROPIC_API_KEY
+      if (config.name === 'claude' && env.ANTHROPIC_API_KEY) {
+        return true;
+      }
+      // Fallback to generic API_KEY
+      return !!env.API_KEY;
+    })
+    .sort((a, b) => a.priority - b.priority); // Lower priority number = higher priority
+  return availableProviders.length > 0
+    ? availableProviders[0].name
+    : 'gemini'; // Default to gemini (cheapest)
+}
+/**
+ * Get API key for provider
+ */
+function getApiKey(provider, env) {
+  // Check provider-specific key first
+  const providerKey = env[`${provider.toUpperCase()}_API_KEY`];
+  if (providerKey) {
+    return providerKey;
+  }
+  // Special case: Anthropic uses ANTHROPIC_API_KEY (not CLAUDE_API_KEY)
+  if (provider === 'claude' && env.ANTHROPIC_API_KEY) {
+    return env.ANTHROPIC_API_KEY;
+  }
+  // Special case: Groq uses GROQ_API_KEY
+  if (provider === 'groq' && env.GROQ_API_KEY) {
+    return env.GROQ_API_KEY;
+  }
+  // Fallback to generic API_KEY
+  return env.API_KEY || null;
+}
+/**
+ * Get current configuration (singleton)
+ *
+ * @returns {import('./index.mjs').Config} Current configuration
+ */
+let configInstance = null;
+export function getConfig() {
+  if (!configInstance) {
+    configInstance = createConfig();
+  }
+  return configInstance;
+}
+/**
+ * Set configuration (useful for testing)
+ *
+ * @param {import('./index.mjs').Config} config - Configuration to set
+ * @returns {void}
+ */
+export function setConfig(config) {
+  configInstance = config;
+}
+/**
+ * Get provider configuration
+ *
+ * @param {string | null} [providerName=null] - Provider name, or null to use default
+ * @returns {import('./index.mjs').Config['providerConfig']} Provider configuration
+ */
+export function getProvider(providerName = null) {
+  const config = getConfig();
+  const provider = providerName || config.provider;
+  return PROVIDER_CONFIGS[provider] || PROVIDER_CONFIGS.gemini;
+}