npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 - Mend

@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/.secretsignore.example +20 -0
package/CHANGELOG.md +360 -0
package/CONTRIBUTING.md +63 -0
package/DEPLOYMENT.md +80 -0
package/LICENSE +22 -0
package/README.md +142 -0
package/SECURITY.md +108 -0
package/api/health.js +34 -0
package/api/validate.js +252 -0
package/index.d.ts +1221 -0
package/package.json +112 -0
package/public/index.html +149 -0
package/src/batch-optimizer.mjs +451 -0
package/src/bias-detector.mjs +370 -0
package/src/bias-mitigation.mjs +233 -0
package/src/cache.mjs +433 -0
package/src/config.mjs +268 -0
package/src/constants.mjs +80 -0
package/src/context-compressor.mjs +350 -0
package/src/convenience.mjs +617 -0
package/src/cost-tracker.mjs +257 -0
package/src/cross-modal-consistency.mjs +170 -0
package/src/data-extractor.mjs +232 -0
package/src/dynamic-few-shot.mjs +140 -0
package/src/dynamic-prompts.mjs +361 -0
package/src/ensemble/index.mjs +53 -0
package/src/ensemble-judge.mjs +366 -0
package/src/error-handler.mjs +67 -0
package/src/errors.mjs +167 -0
package/src/experience-propagation.mjs +128 -0
package/src/experience-tracer.mjs +487 -0
package/src/explanation-manager.mjs +299 -0
package/src/feedback-aggregator.mjs +248 -0
package/src/game-goal-prompts.mjs +478 -0
package/src/game-player.mjs +548 -0
package/src/hallucination-detector.mjs +155 -0
package/src/helpers/playwright.mjs +80 -0
package/src/human-validation-manager.mjs +516 -0
package/src/index.mjs +364 -0
package/src/judge.mjs +929 -0
package/src/latency-aware-batch-optimizer.mjs +192 -0
package/src/load-env.mjs +159 -0
package/src/logger.mjs +55 -0
package/src/metrics.mjs +187 -0
package/src/model-tier-selector.mjs +221 -0
package/src/multi-modal/index.mjs +36 -0
package/src/multi-modal-fusion.mjs +190 -0
package/src/multi-modal.mjs +524 -0
package/src/natural-language-specs.mjs +1071 -0
package/src/pair-comparison.mjs +277 -0
package/src/persona/index.mjs +42 -0
package/src/persona-enhanced.mjs +200 -0
package/src/persona-experience.mjs +572 -0
package/src/position-counterbalance.mjs +140 -0
package/src/prompt-composer.mjs +375 -0
package/src/render-change-detector.mjs +583 -0
package/src/research-enhanced-validation.mjs +436 -0
package/src/retry.mjs +152 -0
package/src/rubrics.mjs +231 -0
package/src/score-tracker.mjs +277 -0
package/src/smart-validator.mjs +447 -0
package/src/spec-config.mjs +106 -0
package/src/spec-templates.mjs +347 -0
package/src/specs/index.mjs +38 -0
package/src/temporal/index.mjs +102 -0
package/src/temporal-adaptive.mjs +163 -0
package/src/temporal-batch-optimizer.mjs +222 -0
package/src/temporal-constants.mjs +69 -0
package/src/temporal-context.mjs +49 -0
package/src/temporal-decision-manager.mjs +271 -0
package/src/temporal-decision.mjs +669 -0
package/src/temporal-errors.mjs +58 -0
package/src/temporal-note-pruner.mjs +173 -0
package/src/temporal-preprocessor.mjs +543 -0
package/src/temporal-prompt-formatter.mjs +219 -0
package/src/temporal-validation.mjs +159 -0
package/src/temporal.mjs +415 -0
package/src/type-guards.mjs +311 -0
package/src/uncertainty-reducer.mjs +470 -0
package/src/utils/index.mjs +175 -0
package/src/validation-framework.mjs +321 -0
package/src/validation-result-normalizer.mjs +64 -0
package/src/validation.mjs +243 -0
package/src/validators/accessibility-programmatic.mjs +345 -0
package/src/validators/accessibility-validator.mjs +223 -0
package/src/validators/batch-validator.mjs +143 -0
package/src/validators/hybrid-validator.mjs +268 -0
package/src/validators/index.mjs +34 -0
package/src/validators/prompt-builder.mjs +218 -0
package/src/validators/rubric.mjs +85 -0
package/src/validators/state-programmatic.mjs +260 -0
package/src/validators/state-validator.mjs +291 -0
package/vercel.json +27 -0

package/src/model-tier-selector.mjs ADDED Viewed

@@ -0,0 +1,221 @@
+/**
+ * Model Tier Selector
+ *
+ * Automatically selects the best model tier based on context (frequency, criticality, cost).
+ * Similar pattern to smart-validator.mjs which auto-selects validator types.
+ *
+ * Design Philosophy:
+ * - High-frequency decisions (10-60Hz) → use 'fast' tier
+ * - Critical evaluations → use 'best' tier
+ * - Cost-sensitive → use 'fast' tier
+ * - Standard validations → use 'balanced' tier (default)
+ *
+ * This prevents the common mistake of using expensive models for high-frequency decisions.
+ */
+import { log, warn } from './logger.mjs';
+/**
+ * Select model tier based on context
+ *
+ * @param {Object} context - Validation context
+ * @param {string|number} [context.frequency] - Decision frequency ('high'|'medium'|'low' or Hz number)
+ * @param {string} [context.criticality] - Criticality level ('critical'|'high'|'medium'|'low')
+ * @param {boolean} [context.costSensitive] - Cost-sensitive operation
+ * @param {boolean} [context.qualityRequired] - High quality required
+ * @param {string} [context.testType] - Test type (may indicate criticality)
+ * @param {Object} [context.temporalNotes] - Temporal notes (for frequency detection)
+ * @returns {string} Model tier ('fast'|'balanced'|'best')
+ */
+export function selectModelTier(context = {}) {
+  const {
+    frequency,
+    criticality,
+    costSensitive,
+    qualityRequired,
+    testType,
+    temporalNotes
+  } = context;
+  // Detect frequency from temporal notes if available
+  let detectedFrequency = frequency;
+  if (!detectedFrequency && temporalNotes && Array.isArray(temporalNotes) && temporalNotes.length > 1) {
+    const recentNotes = temporalNotes.slice(-10);
+    if (recentNotes.length >= 2) {
+      const timeSpan = recentNotes[recentNotes.length - 1].timestamp - recentNotes[0].timestamp;
+      if (timeSpan > 0) {
+        const notesPerSecond = recentNotes.length / (timeSpan / 1000);
+        if (notesPerSecond > 10) {
+          detectedFrequency = 'high';
+        } else if (notesPerSecond > 1) {
+          detectedFrequency = 'medium';
+        } else {
+          detectedFrequency = 'low';
+        }
+      }
+    }
+  }
+  // Convert numeric frequency to category
+  if (typeof detectedFrequency === 'number') {
+    if (detectedFrequency >= 10) {
+      detectedFrequency = 'high'; // 10-60Hz
+    } else if (detectedFrequency >= 1) {
+      detectedFrequency = 'medium'; // 1-10Hz
+    } else {
+      detectedFrequency = 'low'; // <1Hz
+    }
+  }
+  // Tier 1: High-frequency decisions (10-60Hz) → fast
+  // Rationale: Speed is critical, quality can be lower
+  if (detectedFrequency === 'high' || detectedFrequency === 'ultra-high') {
+    log('[ModelTierSelector] High-frequency detected, selecting fast tier');
+    return 'fast';
+  }
+  // Tier 2: Critical evaluations → best
+  // Rationale: Quality is critical, speed can be slower
+  if (criticality === 'critical' || qualityRequired === true) {
+    log('[ModelTierSelector] Critical evaluation detected, selecting best tier');
+    return 'best';
+  }
+  // Check testType for critical indicators
+  if (testType === 'expert-evaluation' || testType === 'medical' || testType === 'accessibility-critical') {
+    log('[ModelTierSelector] Critical test type detected, selecting best tier');
+    return 'best';
+  }
+  // Tier 3: Cost-sensitive → fast
+  // Rationale: Minimize cost, acceptable quality
+  if (costSensitive === true) {
+    log('[ModelTierSelector] Cost-sensitive detected, selecting fast tier');
+    return 'fast';
+  }
+  // Tier 4: Standard validations → balanced (default)
+  // Rationale: Best balance of speed and quality
+  log('[ModelTierSelector] Standard validation, selecting balanced tier (default)');
+  return 'balanced';
+}
+/**
+ * Select provider based on requirements
+ *
+ * @param {Object} requirements - Provider requirements
+ * @param {string} [requirements.speed] - Speed requirement ('ultra-fast'|'fast'|'normal'|'slow')
+ * @param {string} [requirements.quality] - Quality requirement ('best'|'good'|'acceptable')
+ * @param {boolean} [requirements.costSensitive] - Cost-sensitive
+ * @param {number} [requirements.contextSize] - Context size in tokens
+ * @param {boolean} [requirements.vision] - Vision required (default: true for VLLM)
+ * @param {Object} [requirements.env] - Environment variables (for API key detection)
+ * @returns {string} Provider name ('gemini'|'openai'|'claude'|'groq')
+ */
+export function selectProvider(requirements = {}) {
+  const {
+    speed = 'normal',
+    quality = 'good',
+    costSensitive = false,
+    contextSize = 0,
+    vision = true, // Default true for VLLM
+    env = {}
+  } = requirements;
+  // Ultra-fast, text-only → Groq (if no vision needed)
+  if (speed === 'ultra-fast' && !vision) {
+    if (env.GROQ_API_KEY) {
+      log('[ModelTierSelector] Ultra-fast text-only, selecting Groq');
+      return 'groq';
+    }
+  }
+  // Large context → Gemini (1M+ tokens)
+  if (contextSize > 200000) {
+    if (env.GEMINI_API_KEY) {
+      log('[ModelTierSelector] Large context detected, selecting Gemini');
+      return 'gemini';
+    }
+  }
+  // Best quality → Gemini 2.5 Pro or GPT-5
+  if (quality === 'best') {
+    if (env.GEMINI_API_KEY) {
+      log('[ModelTierSelector] Best quality required, selecting Gemini');
+      return 'gemini';
+    }
+    if (env.OPENAI_API_KEY) {
+      log('[ModelTierSelector] Best quality required, selecting OpenAI');
+      return 'openai';
+    }
+  }
+  // Fast + good quality → Gemini Flash
+  if (speed === 'fast' && quality === 'good') {
+    if (env.GEMINI_API_KEY) {
+      log('[ModelTierSelector] Fast + good quality, selecting Gemini');
+      return 'gemini';
+    }
+  }
+  // Cost-sensitive → Gemini (free tier, lower cost)
+  if (costSensitive) {
+    if (env.GEMINI_API_KEY) {
+      log('[ModelTierSelector] Cost-sensitive, selecting Gemini');
+      return 'gemini';
+    }
+    if (env.GROQ_API_KEY && !vision) {
+      log('[ModelTierSelector] Cost-sensitive text-only, selecting Groq');
+      return 'groq';
+    }
+  }
+  // Default → Auto-detect from available API keys
+  // Priority: Groq (if vision supported) > Gemini > OpenAI > Claude
+  if (vision && env.GROQ_API_KEY) {
+    log('[ModelTierSelector] Default, selecting Groq (vision supported)');
+    return 'groq';
+  }
+  if (env.GEMINI_API_KEY) {
+    log('[ModelTierSelector] Default, selecting Gemini');
+    return 'gemini';
+  }
+  if (env.OPENAI_API_KEY) {
+    log('[ModelTierSelector] Default, selecting OpenAI');
+    return 'openai';
+  }
+  if (env.ANTHROPIC_API_KEY) {
+    log('[ModelTierSelector] Default, selecting Claude');
+    return 'claude';
+  }
+  // Fallback
+  warn('[ModelTierSelector] No API keys found, defaulting to gemini');
+  return 'gemini';
+}
+/**
+ * Select model tier and provider based on context
+ *
+ * Combines tier and provider selection for convenience.
+ *
+ * @param {Object} context - Validation context
+ * @param {Object} [context.requirements] - Provider requirements
+ * @returns {{tier: string, provider: string, reason: string}}
+ */
+export function selectModelTierAndProvider(context = {}) {
+  const { requirements = {}, ...tierContext } = context;
+  const tier = selectModelTier(tierContext);
+  const provider = selectProvider({
+    ...requirements,
+    env: process.env
+  });
+  return {
+    tier,
+    provider,
+    reason: `Selected ${provider} ${tier} tier based on context`
+  };
+}

package/src/multi-modal/index.mjs ADDED Viewed

@@ -0,0 +1,36 @@
+/**
+ * Multi-Modal Sub-Module
+ *
+ * Multi-modal validation features (screenshot + HTML + CSS + rendered code).
+ *
+ * Import from 'ai-visual-test/multi-modal'
+ */
+// Core multi-modal functions
+export {
+  multiModalValidation,
+  captureTemporalScreenshots,
+  extractRenderedCode,
+  multiPerspectiveEvaluation
+} from '../multi-modal.mjs';
+// Multi-modal fusion
+export {
+  buildStructuredFusionPrompt,
+  calculateModalityWeights,
+  compareFusionStrategies
+} from '../multi-modal-fusion.mjs';
+// Cross-modal consistency
+export {
+  checkCrossModalConsistency,
+  validateExperienceConsistency
+} from '../cross-modal-consistency.mjs';
+// Prompt composition
+export {
+  composeSingleImagePrompt,
+  composeComparisonPrompt,
+  composeMultiModalPrompt
+} from '../prompt-composer.mjs';

package/src/multi-modal-fusion.mjs ADDED Viewed

@@ -0,0 +1,190 @@
+/**
+ * Attention-Based Multi-Modal Fusion
+ *
+ * Implements structured fusion with attention mechanisms for combining
+ * screenshot, HTML, CSS, and rendered code modalities.
+ *
+ * Research:
+ * - "Multimodal Fusion and Vision-Language Models: A Survey for Robot Vision" - Comprehensive survey
+ * - "Cross-Modal Consistency in Multimodal Large Language Models" - Consistency issues in GPT-4V
+ * - "Post-pre-training for Modality Alignment in Vision-Language Foundation Models" - CLIP-Refine
+ * - "Attention-Based Multimodal Fusion" - Various papers on attention mechanisms
+ *
+ * Key findings: Structured fusion outperforms simple concatenation. Modality gap exists even
+ * after contrastive training. Cross-attention enables selective information integration.
+ * Hallucination is a major issue, especially with stylized images.
+ *
+ * Note: This implementation uses heuristic-based attention weighting. Full research implementation
+ * would use learned cross-attention mechanisms and address the modality gap.
+ */
+/**
+ * Calculate attention weights for different modalities
+ *
+ * @param {Object} modalities - Available modalities
+ * @param {string} [modalities.screenshot] - Screenshot path
+ * @param {Object} [modalities.renderedCode] - Rendered code (HTML, CSS, DOM)
+ * @param {Object} [modalities.gameState] - Game state
+ * @param {string} prompt - Validation prompt
+ * @returns {Object} Attention weights for each modality
+ */
+export function calculateModalityWeights(modalities, prompt) {
+  const weights = {
+    screenshot: 0.4, // Base weight for visual
+    html: 0.2,
+    css: 0.2,
+    dom: 0.1,
+    gameState: 0.1
+  };
+  // Adjust weights based on prompt content
+  const promptLower = prompt.toLowerCase();
+  // If prompt mentions visual/design, increase screenshot weight
+  if (promptLower.includes('visual') || promptLower.includes('design') || promptLower.includes('appearance')) {
+    weights.screenshot = 0.5;
+    weights.html = 0.2;
+    weights.css = 0.2;
+    weights.dom = 0.05;
+    weights.gameState = 0.05;
+  }
+  // If prompt mentions structure/layout, increase HTML/DOM weight
+  if (promptLower.includes('structure') || promptLower.includes('layout') || promptLower.includes('html')) {
+    weights.html = 0.3;
+    weights.dom = 0.2;
+    weights.screenshot = 0.3;
+    weights.css = 0.15;
+    weights.gameState = 0.05;
+  }
+  // If prompt mentions styling, increase CSS weight
+  if (promptLower.includes('style') || promptLower.includes('css') || promptLower.includes('styling')) {
+    weights.css = 0.3;
+    weights.screenshot = 0.35;
+    weights.html = 0.2;
+    weights.dom = 0.1;
+    weights.gameState = 0.05;
+  }
+  // If prompt mentions state/functionality, increase gameState weight
+  if (promptLower.includes('state') || promptLower.includes('function') || promptLower.includes('game')) {
+    weights.gameState = 0.2;
+    weights.screenshot = 0.35;
+    weights.html = 0.2;
+    weights.css = 0.15;
+    weights.dom = 0.1;
+  }
+  // Normalize weights
+  const total = Object.values(weights).reduce((a, b) => a + b, 0);
+  for (const key in weights) {
+    weights[key] = weights[key] / total;
+  }
+  return weights;
+}
+/**
+ * Build structured fusion prompt with attention weights
+ *
+ * @param {string} basePrompt - Base validation prompt
+ * @param {Object} modalities - Available modalities
+ * @param {string} [modalities.screenshot] - Screenshot path
+ * @param {Object} [modalities.renderedCode] - Rendered code
+ * @param {Object} [modalities.gameState] - Game state
+ * @returns {string} Structured fusion prompt
+ */
+export function buildStructuredFusionPrompt(basePrompt, modalities) {
+  const weights = calculateModalityWeights(modalities, basePrompt);
+  const parts = [basePrompt];
+  parts.push('\n\n=== MULTI-MODAL CONTEXT (Weighted by Relevance) ===\n');
+  // Screenshot (always highest weight for visual validation)
+  if (modalities.screenshot) {
+    parts.push(`[VISUAL - Weight: ${(weights.screenshot * 100).toFixed(0)}%]`);
+    parts.push(`Screenshot: ${modalities.screenshot}`);
+    parts.push('Use this visual representation as the primary reference for appearance and layout.\n');
+  }
+  // HTML structure
+  if (modalities.renderedCode?.html) {
+    parts.push(`[STRUCTURE - Weight: ${(weights.html * 100).toFixed(0)}%]`);
+    parts.push('HTML Structure:');
+    parts.push(modalities.renderedCode.html.substring(0, 2000)); // Limit length
+    parts.push('\nUse this for understanding semantic structure and element hierarchy.\n');
+  }
+  // CSS styling
+  if (modalities.renderedCode?.criticalCSS) {
+    parts.push(`[STYLING - Weight: ${(weights.css * 100).toFixed(0)}%]`);
+    parts.push('Critical CSS:');
+    const cssText = typeof modalities.renderedCode.criticalCSS === 'string'
+      ? modalities.renderedCode.criticalCSS
+      : JSON.stringify(modalities.renderedCode.criticalCSS, null, 2);
+    parts.push(cssText.substring(0, 2000)); // Limit length
+    parts.push('\nUse this for understanding visual styling, positioning, and layout rules.\n');
+  }
+  // DOM structure
+  if (modalities.renderedCode?.domStructure) {
+    parts.push(`[DOM - Weight: ${(weights.dom * 100).toFixed(0)}%]`);
+    parts.push('DOM Structure:');
+    const domText = typeof modalities.renderedCode.domStructure === 'string'
+      ? modalities.renderedCode.domStructure
+      : JSON.stringify(modalities.renderedCode.domStructure, null, 2);
+    parts.push(domText.substring(0, 1000)); // Limit length
+    parts.push('\nUse this for understanding element relationships and computed properties.\n');
+  }
+  // Game state
+  if (modalities.gameState && Object.keys(modalities.gameState).length > 0) {
+    parts.push(`[STATE - Weight: ${(weights.gameState * 100).toFixed(0)}%]`);
+    parts.push('Game State:');
+    parts.push(JSON.stringify(modalities.gameState, null, 2));
+    parts.push('\nUse this for understanding functional state and dynamic behavior.\n');
+  }
+  parts.push('\n=== EVALUATION INSTRUCTIONS ===');
+  parts.push('1. Primary: Use screenshot for visual assessment');
+  parts.push('2. Secondary: Use HTML/CSS for structural validation');
+  parts.push('3. Tertiary: Use DOM/State for functional validation');
+  parts.push('4. Weight your assessment based on the relevance weights above');
+  parts.push('5. Cross-reference modalities to identify inconsistencies');
+  return parts.join('\n');
+}
+/**
+ * Compare structured fusion vs simple concatenation
+ *
+ * @param {string} basePrompt - Base prompt
+ * @param {Object} modalities - Available modalities
+ * @returns {Object} Comparison of fusion strategies
+ */
+export function compareFusionStrategies(basePrompt, modalities) {
+  // Simple concatenation (current approach)
+  const simplePrompt = `${basePrompt}\n\nSCREENSHOT:\n${modalities.screenshot || 'N/A'}\n\nRENDERED CODE:\n${JSON.stringify(modalities.renderedCode || {}, null, 2)}\n\nGAME STATE:\n${JSON.stringify(modalities.gameState || {}, null, 2)}`;
+  // Structured fusion (new approach)
+  const structuredPrompt = buildStructuredFusionPrompt(basePrompt, modalities);
+  return {
+    simple: {
+      length: simplePrompt.length,
+      modalityCount: Object.keys(modalities).length,
+      hasWeights: false
+    },
+    structured: {
+      length: structuredPrompt.length,
+      modalityCount: Object.keys(modalities).length,
+      hasWeights: true,
+      weights: calculateModalityWeights(modalities, basePrompt)
+    },
+    recommendation: 'Use structured fusion for better modality integration'
+  };
+}