npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 → 0.7.4 - Mend

@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/CHANGELOG.md +127 -11
package/DEPLOYMENT.md +225 -9
package/README.md +71 -80
package/index.d.ts +902 -5
package/package.json +10 -51
package/src/batch-optimizer.mjs +39 -0
package/src/cache.mjs +241 -16
package/src/config.mjs +33 -91
package/src/constants.mjs +54 -0
package/src/convenience.mjs +113 -10
package/src/cost-optimization.mjs +1 -0
package/src/cost-tracker.mjs +134 -2
package/src/data-extractor.mjs +36 -7
package/src/dynamic-few-shot.mjs +69 -11
package/src/errors.mjs +6 -2
package/src/experience-propagation.mjs +12 -0
package/src/experience-tracer.mjs +12 -3
package/src/game-player.mjs +222 -43
package/src/graceful-shutdown.mjs +126 -0
package/src/helpers/playwright.mjs +22 -8
package/src/human-validation-manager.mjs +99 -2
package/src/index.mjs +48 -3
package/src/integrations/playwright.mjs +140 -0
package/src/judge.mjs +699 -24
package/src/load-env.mjs +2 -1
package/src/logger.mjs +31 -3
package/src/model-tier-selector.mjs +1 -221
package/src/natural-language-specs.mjs +31 -3
package/src/persona-enhanced.mjs +4 -2
package/src/persona-experience.mjs +1 -1
package/src/pricing.mjs +28 -0
package/src/prompt-composer.mjs +162 -5
package/src/provider-data.mjs +115 -0
package/src/render-change-detector.mjs +5 -0
package/src/research-enhanced-validation.mjs +7 -5
package/src/retry.mjs +21 -7
package/src/rubrics.mjs +4 -0
package/src/safe-logger.mjs +71 -0
package/src/session-cost-tracker.mjs +320 -0
package/src/smart-validator.mjs +8 -8
package/src/spec-templates.mjs +52 -6
package/src/startup-validation.mjs +127 -0
package/src/temporal-adaptive.mjs +2 -2
package/src/temporal-decision-manager.mjs +1 -271
package/src/temporal-logic.mjs +104 -0
package/src/temporal-note-pruner.mjs +119 -0
package/src/temporal-preprocessor.mjs +1 -543
package/src/temporal.mjs +681 -79
package/src/utils/action-hallucination-detector.mjs +301 -0
package/src/utils/baseline-validator.mjs +82 -0
package/src/utils/cache-stats.mjs +104 -0
package/src/utils/cached-llm.mjs +164 -0
package/src/utils/capability-stratifier.mjs +108 -0
package/src/utils/counterfactual-tester.mjs +83 -0
package/src/utils/error-recovery.mjs +117 -0
package/src/utils/explainability-scorer.mjs +119 -0
package/src/utils/exploratory-automation.mjs +131 -0
package/src/utils/index.mjs +10 -0
package/src/utils/intent-recognizer.mjs +201 -0
package/src/utils/log-sanitizer.mjs +165 -0
package/src/utils/path-validator.mjs +88 -0
package/src/utils/performance-logger.mjs +316 -0
package/src/utils/performance-measurement.mjs +280 -0
package/src/utils/prompt-sanitizer.mjs +213 -0
package/src/utils/rate-limiter.mjs +144 -0
package/src/validation-framework.mjs +24 -20
package/src/validation-result-normalizer.mjs +35 -1
package/src/validation.mjs +75 -25
package/src/validators/accessibility-validator.mjs +144 -0
package/src/validators/hybrid-validator.mjs +48 -4
package/api/health.js +0 -34
package/api/validate.js +0 -252
package/public/index.html +0 -149
package/vercel.json +0 -27

package/src/provider-data.mjs ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Provider Data & Pricing Configuration
+ *
+ * Central source of truth for provider configuration, models, and pricing.
+ * Extracted from config.mjs to facilitate updates and prevent drift.
+ *
+ * ⚠️ IMPORTANT: Model names should be verified against current provider documentation.
+ * Some models may be preview-only, deprecated, or have different names in production.
+ */
+/**
+ * Model tiers for each provider
+ *
+ * Models can be overridden via environment variables:
+ * - VLM_MODEL_TIER: 'fast' | 'balanced' | 'best'
+ * - VLM_MODEL: explicit model name override
+ *
+ * GROQ INTEGRATION:
+ * - Groq added for high-frequency decisions (10-60Hz temporal decisions)
+ * - ~0.22s latency (vs 1-3s for other providers)
+ * - 185-276 tokens/sec throughput
+ * - OpenAI-compatible API
+ * - Cost-competitive, free tier available
+ * - Useful for: Fast tier decisions, high-Hz temporal decisions, real-time applications
+ */
+export const MODEL_TIERS = {
+  gemini: {
+    fast: 'gemini-2.5-flash',          // Fast, cost-effective (stable)
+    balanced: 'gemini-2.5-flash',      // Good balance (using Flash as default balanced too)
+    best: 'gemini-3-pro-preview'       // High quality (preview)
+  },
+  openai: {
+    fast: 'gpt-4o-mini',               // Fast, cheaper
+    balanced: 'gpt-4o',                // Balanced (current production)
+    best: 'gpt-5'                      // High quality (late 2025, latest production)
+  },
+  claude: {
+    fast: 'claude-haiku-4-5',           // Fast, cheaper (Haiku 4.5, Feb 2025)
+    balanced: 'claude-sonnet-4-5',      // Balanced (Sept 2025)
+    best: 'claude-opus-4-6'             // High quality (Opus 4.6, March 2026)
+  },
+  groq: {
+    // NOTE: Groq vision support requires different model
+    // For vision: meta-llama/llama-4-scout-17b-16e-instruct (preview, supports vision)
+    // For text-only: llama-3.3-70b-versatile is fastest (~0.22s latency)
+    fast: 'meta-llama/llama-4-scout-17b-16e-instruct',   // Vision-capable, fastest Groq option
+    balanced: 'meta-llama/llama-4-scout-17b-16e-instruct', // Vision-capable, balanced
+    best: 'meta-llama/llama-4-scout-17b-16e-instruct'   // Vision-capable, high quality (preview)
+    // WARNING: Groq vision models are preview-only. Text-only: use llama-3.3-70b-versatile
+  },
+  openrouter: {
+    // OpenRouter provides access to multiple models via unified API
+    fast: 'anthropic/claude-haiku-4-5',       // Fast, cheaper via OpenRouter
+    balanced: 'anthropic/claude-sonnet-4-5',  // Balanced via OpenRouter
+    best: 'anthropic/claude-opus-4-6'         // High quality via OpenRouter
+  }
+};
+/**
+ * Default provider configurations
+ *
+ * GROQ INTEGRATION:
+ * - OpenAI-compatible API (easy migration)
+ * - ~0.22s latency (10x faster than typical providers)
+ * - Useful for high-frequency decisions (10-60Hz temporal decisions)
+ * - Free tier available for testing
+ */
+export const PROVIDER_CONFIGS = {
+  gemini: {
+    name: 'gemini',
+    apiUrl: 'https://generativelanguage.googleapis.com/v1beta',
+    model: 'gemini-2.5-flash',            // Latest stable (June 2025)
+    freeTier: true,
+    pricing: { input: 0.10, output: 0.40 }, // 2.5 Flash is cheaper
+    priority: 1
+  },
+  openai: {
+    name: 'openai',
+    apiUrl: 'https://api.openai.com/v1',
+    model: 'gpt-4o',                    // Current production
+    freeTier: false,
+    pricing: { input: 5.00, output: 15.00 },
+    priority: 2
+  },
+  claude: {
+    name: 'claude',
+    apiUrl: 'https://api.anthropic.com/v1',
+    model: 'claude-sonnet-4-5',         // Latest flagship (Sept 2025)
+    freeTier: false,
+    pricing: { input: 3.00, output: 15.00 },
+    priority: 3
+  },
+  groq: {
+    name: 'groq',
+    apiUrl: 'https://api.groq.com/openai/v1', // OpenAI-compatible endpoint
+    model: 'meta-llama/llama-4-scout-17b-16e-instruct',   // Vision-capable (preview), ~0.22s latency
+    freeTier: true,                      // Free tier available
+    pricing: { input: 0.59, output: 0.79 }, // Actual 2025 pricing: $0.59/$0.79 per 1M tokens (real-time API)
+    priority: 0,                         // Highest priority for high-frequency decisions
+    latency: 220,                        // ~0.22s latency in ms (10x faster than typical)
+    throughput: 200,                     // ~200 tokens/sec average
+    visionSupported: true               // llama-4-scout-17b-16e-instruct supports vision (preview)
+    // Text-only alternative: llama-3.3-70b-versatile (faster, no vision)
+  },
+  openrouter: {
+    name: 'openrouter',
+    apiUrl: 'https://openrouter.ai/api/v1', // OpenAI-compatible endpoint
+    model: 'anthropic/claude-sonnet-4',     // Default to Claude Sonnet via OpenRouter
+    freeTier: false,
+    pricing: { input: 3.00, output: 15.00 }, // Varies by model
+    priority: 2,
+    visionSupported: true
+  }
+};

package/src/render-change-detector.mjs CHANGED Viewed

@@ -253,6 +253,11 @@ export function calculateOptimalFPS(changeHistory, options = {}) {
     targetChangeInterval = 100 // Target: capture every 100ms of changes
   } = options;
+  // Handle null/undefined or non-array input
+  if (!changeHistory || !Array.isArray(changeHistory)) {
+    return minFPS;
+  }
   if (changeHistory.length < 2) {
     return minFPS;
   }

package/src/research-enhanced-validation.mjs CHANGED Viewed

@@ -307,12 +307,14 @@ export async function validateWithExplicitRubric(imagePath, prompt, options = {}
   // Import rubric builder
   const { buildRubricPrompt, DEFAULT_RUBRIC } = await import('./rubrics.mjs');
-  // Build prompt with explicit rubric
+  // Build prompt: user prompt + rubric evaluation framework.
+  // buildRubricPrompt(rubric, includeDimensions) returns the rubric text;
+  // we prepend the user's prompt so both are sent to the VLM.
+  const rubricToUse = rubric || (useDefaultRubric ? DEFAULT_RUBRIC : null);
   let enhancedPrompt = prompt;
-  if (useDefaultRubric && !rubric) {
-    enhancedPrompt = buildRubricPrompt(prompt, DEFAULT_RUBRIC);
-  } else if (rubric) {
-    enhancedPrompt = buildRubricPrompt(prompt, rubric);
+  if (rubricToUse) {
+    const rubricText = buildRubricPrompt(rubricToUse, true);
+    enhancedPrompt = `${prompt}\n\n${rubricText}`;
   }
   // Perform validation

package/src/retry.mjs CHANGED Viewed

@@ -7,6 +7,7 @@
 import { ProviderError, TimeoutError } from './errors.mjs';
 import { log, warn } from './logger.mjs';
+import { RETRY_CONSTANTS } from './constants.mjs';
 /**
  * Check if an error is retryable
@@ -50,12 +51,12 @@ export function isRetryableError(error) {
  * @param {boolean} jitter - Add random jitter to prevent thundering herd
  * @returns {number} Delay in milliseconds
  */
-export function calculateBackoff(attempt, baseDelay = 1000, maxDelay = 30000, jitter = true) {
+export function calculateBackoff(attempt, baseDelay = RETRY_CONSTANTS.DEFAULT_BASE_DELAY_MS, maxDelay = RETRY_CONSTANTS.DEFAULT_MAX_DELAY_MS, jitter = true) {
   const exponentialDelay = Math.min(baseDelay * Math.pow(2, attempt), maxDelay);
   if (jitter) {
-    // Add ±25% random jitter
-    const jitterAmount = exponentialDelay * 0.25;
+    // Add random jitter to prevent thundering herd
+    const jitterAmount = exponentialDelay * RETRY_CONSTANTS.JITTER_PERCENTAGE;
     const jitterValue = (Math.random() * 2 - 1) * jitterAmount;
     return Math.max(0, exponentialDelay + jitterValue);
   }
@@ -80,9 +81,9 @@ export function calculateBackoff(attempt, baseDelay = 1000, maxDelay = 30000, ji
  */
 export async function retryWithBackoff(fn, options = {}) {
   const {
-    maxRetries = 3,
-    baseDelay = 1000,
-    maxDelay = 30000,
+    maxRetries = RETRY_CONSTANTS.DEFAULT_MAX_RETRIES,
+    baseDelay = RETRY_CONSTANTS.DEFAULT_BASE_DELAY_MS,
+    maxDelay = RETRY_CONSTANTS.DEFAULT_MAX_DELAY_MS,
     onRetry = null,
     retryable = isRetryableError
   } = options;
@@ -117,7 +118,20 @@ export async function retryWithBackoff(fn, options = {}) {
     }
   }
-  // All retries exhausted
+  // All retries exhausted - enhance error message with retry context
+  const enhancedMessage = enhanceErrorMessage(
+    lastError,
+    maxRetries + 1,
+    'retryWithBackoff'
+  );
+  // Preserve original error but enhance message
+  if (lastError instanceof Error) {
+    lastError.message = enhancedMessage;
+  } else {
+    lastError = new Error(enhancedMessage);
+  }
   throw lastError;
 }

package/src/rubrics.mjs CHANGED Viewed

@@ -162,6 +162,10 @@ Provide your evaluation as JSON:
     "visual": "<visual evidence from screenshot>",
     "functional": "<functional evidence>",
     "accessibility": "<accessibility evidence>"
+  },
+  "dimensionScores": {
+    "<dimension_name>": <0-10 integer>,
+    ...for each dimension in the rubric
   }
 }`;

package/src/safe-logger.mjs ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Performance Logger Service
+ *
+ * Handles optional performance logging without fire-and-forget race conditions.
+ * Provides a safe, non-blocking interface for metrics.
+ */
+let loggerImplementation = null;
+let isEnabled = false;
+/**
+ * Initialize the logger (lazy load)
+ */
+async function initLogger() {
+  if (loggerImplementation) return loggerImplementation;
+  try {
+    // Only load if actually needed/configured
+    const { logCacheOperation, logTemporalDecision } = await import('./utils/performance-logger.mjs');
+    loggerImplementation = { logCacheOperation, logTemporalDecision };
+    isEnabled = true;
+  } catch (error) {
+    // Graceful degradation - if logger fails to load, we just don't log
+    // This is better than crashing or hanging
+    isEnabled = false;
+    loggerImplementation = {
+      logCacheOperation: () => {},
+      logTemporalDecision: () => {}
+    };
+  }
+  return loggerImplementation;
+}
+/**
+ * Log a cache operation safely
+ */
+export function safeLogCacheOperation(data) {
+  // Non-blocking check
+  if (!isEnabled && !loggerImplementation) {
+    // Trigger init but don't wait for it (fire and forget INIT, not the log itself)
+    initLogger().then(logger => logger?.logCacheOperation(data)).catch(() => {});
+    return;
+  }
+  if (isEnabled && loggerImplementation) {
+    try {
+      loggerImplementation.logCacheOperation(data);
+    } catch {
+      // Swallow logging errors to prevent disrupting main flow
+    }
+  }
+}
+/**
+ * Log a temporal decision safely
+ */
+export function safeLogTemporalDecision(data) {
+  if (!isEnabled && !loggerImplementation) {
+    initLogger().then(logger => logger?.logTemporalDecision(data)).catch(() => {});
+    return;
+  }
+  if (isEnabled && loggerImplementation) {
+    try {
+      loggerImplementation.logTemporalDecision(data);
+    } catch {
+      // Swallow logging errors
+    }
+  }
+}

package/src/session-cost-tracker.mjs ADDED Viewed

@@ -0,0 +1,320 @@
+/**
+ * Session-Level Cost Tracker
+ *
+ * Tracks costs per test run/session with detailed breakdown and transparency.
+ * Provides "trap debug" hooks to show total ML API resources for usage tracking.
+ *
+ * Usage:
+ * ```javascript
+ * import { startSession, endSession, getSessionCosts } from './session-cost-tracker.mjs';
+ *
+ * const sessionId = startSession('comprehensive-evaluation');
+ * // ... run tests ...
+ * const summary = endSession(sessionId);
+ * console.log(`Total cost: $${summary.totalCost.toFixed(4)}`);
+ * ```
+ */
+import { getCostTracker, recordCost } from './cost-tracker.mjs';
+import { getCacheStats } from './cache.mjs';
+import { log, warn } from './logger.mjs';
+import { writeFileSync, mkdirSync, existsSync } from 'fs';
+import { join } from 'path';
+/**
+ * Active sessions
+ */
+const activeSessions = new Map();
+/**
+ * Session cost data structure
+ */
+class SessionCostData {
+  constructor(sessionId, name) {
+    this.sessionId = sessionId;
+    this.name = name;
+    this.startTime = Date.now();
+    this.endTime = null;
+    this.costs = {
+      total: 0,
+      byProvider: {},
+      byTest: {},
+      apiCalls: 0,
+      cacheHits: 0,
+      cacheMisses: 0,
+      tokens: {
+        input: 0,
+        output: 0,
+        total: 0
+      }
+    };
+    this.entries = [];
+  }
+  recordCostEntry(entry) {
+    this.costs.total += entry.cost || 0;
+    this.costs.apiCalls += 1;
+    // Track by provider
+    if (!this.costs.byProvider[entry.provider]) {
+      this.costs.byProvider[entry.provider] = { total: 0, calls: 0, tokens: { input: 0, output: 0 } };
+    }
+    this.costs.byProvider[entry.provider].total += entry.cost || 0;
+    this.costs.byProvider[entry.provider].calls += 1;
+    this.costs.byProvider[entry.provider].tokens.input += entry.inputTokens || 0;
+    this.costs.byProvider[entry.provider].tokens.output += entry.outputTokens || 0;
+    // Track by test
+    const testName = entry.testName || 'unknown';
+    if (!this.costs.byTest[testName]) {
+      this.costs.byTest[testName] = { total: 0, calls: 0 };
+    }
+    this.costs.byTest[testName].total += entry.cost || 0;
+    this.costs.byTest[testName].calls += 1;
+    // Track tokens
+    this.costs.tokens.input += entry.inputTokens || 0;
+    this.costs.tokens.output += entry.outputTokens || 0;
+    this.costs.tokens.total = this.costs.tokens.input + this.costs.tokens.output;
+    // Store entry
+    this.entries.push({
+      ...entry,
+      timestamp: entry.timestamp || Date.now()
+    });
+  }
+  recordCacheHit() {
+    this.costs.cacheHits += 1;
+  }
+  recordCacheMiss() {
+    this.costs.cacheMisses += 1;
+  }
+  getSummary() {
+    const duration = (this.endTime || Date.now()) - this.startTime;
+    const cacheHitRate = this.costs.cacheHits + this.costs.cacheMisses > 0
+      ? (this.costs.cacheHits / (this.costs.cacheHits + this.costs.cacheMisses) * 100).toFixed(1)
+      : 0;
+    return {
+      sessionId: this.sessionId,
+      name: this.name,
+      duration: duration,
+      durationSeconds: (duration / 1000).toFixed(2),
+      costs: {
+        ...this.costs,
+        cacheHitRate: `${cacheHitRate}%`,
+        averageCostPerCall: this.costs.apiCalls > 0
+          ? (this.costs.total / this.costs.apiCalls).toFixed(6)
+          : 0,
+        costPerSecond: duration > 0
+          ? ((this.costs.total / duration) * 1000).toFixed(6)
+          : 0
+      },
+      startTime: new Date(this.startTime).toISOString(),
+      endTime: this.endTime ? new Date(this.endTime).toISOString() : null
+    };
+  }
+}
+/**
+ * Start a new cost tracking session
+ *
+ * @param {string} name - Session name (e.g., 'comprehensive-evaluation')
+ * @param {object} [options] - Session options
+ * @returns {string} Session ID
+ */
+export function startSession(name, options = {}) {
+  const sessionId = `session-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+  const session = new SessionCostData(sessionId, name);
+  activeSessions.set(sessionId, session);
+  if (options.verbose !== false) {
+    log(`[CostTracker] Started session: ${name} (${sessionId})`);
+  }
+  return sessionId;
+}
+/**
+ * End a cost tracking session
+ *
+ * @param {string} sessionId - Session ID
+ * @param {object} [options] - Options
+ * @returns {object} Session summary
+ */
+export function endSession(sessionId, options = {}) {
+  const session = activeSessions.get(sessionId);
+  if (!session) {
+    warn(`[CostTracker] Session not found: ${sessionId}`);
+    return null;
+  }
+  session.endTime = Date.now();
+  const summary = session.getSummary();
+  // Get cache stats
+  try {
+    const cacheStats = getCacheStats();
+    summary.cacheStats = cacheStats;
+  } catch (error) {
+    // Silently fail if cache stats unavailable
+  }
+  // Save session report
+  if (options.saveReport !== false) {
+    const reportsDir = join(process.cwd(), 'evaluation', 'results', 'cost-reports');
+    if (!existsSync(reportsDir)) {
+      mkdirSync(reportsDir, { recursive: true });
+    }
+    const reportFile = join(reportsDir, `cost-report-${sessionId}-${Date.now()}.json`);
+    writeFileSync(reportFile, JSON.stringify({
+      summary,
+      entries: session.entries,
+      timestamp: new Date().toISOString()
+    }, null, 2));
+    if (options.verbose !== false) {
+      log(`[CostTracker] Session report saved: ${reportFile}`);
+    }
+  }
+  // Print summary if verbose
+  if (options.verbose !== false) {
+    printSessionSummary(summary);
+  }
+  activeSessions.delete(sessionId);
+  return summary;
+}
+/**
+ * Record cost for current session
+ *
+ * @param {string} sessionId - Session ID
+ * @param {object} costData - Cost data
+ */
+export function recordSessionCost(sessionId, costData) {
+  const session = activeSessions.get(sessionId);
+  if (session) {
+    session.recordCostEntry(costData);
+    // Also record in global cost tracker
+    recordCost(costData);
+  } else {
+    // No active session, just record globally
+    recordCost(costData);
+  }
+}
+/**
+ * Record cache hit for current session
+ *
+ * @param {string} sessionId - Session ID
+ */
+export function recordSessionCacheHit(sessionId) {
+  const session = activeSessions.get(sessionId);
+  if (session) {
+    session.recordCacheHit();
+  }
+}
+/**
+ * Record cache miss for current session
+ *
+ * @param {string} sessionId - Session ID
+ */
+export function recordSessionCacheMiss(sessionId) {
+  const session = activeSessions.get(sessionId);
+  if (session) {
+    session.recordCacheMiss();
+  }
+}
+/**
+ * Get current session costs
+ *
+ * @param {string} sessionId - Session ID
+ * @returns {object} Current session costs
+ */
+export function getSessionCosts(sessionId) {
+  const session = activeSessions.get(sessionId);
+  if (!session) {
+    return null;
+  }
+  return session.getSummary();
+}
+/**
+ * Print session summary
+ */
+function printSessionSummary(summary) {
+  console.log('\n' + '='.repeat(70));
+  console.log(`💰 Cost Report: ${summary.name}`);
+  console.log('='.repeat(70));
+  console.log(`Session ID: ${summary.sessionId}`);
+  console.log(`Duration: ${summary.durationSeconds}s`);
+  console.log(`\n📊 API Usage:`);
+  console.log(`   Total Cost: $${summary.costs.total.toFixed(4)}`);
+  console.log(`   API Calls: ${summary.costs.apiCalls}`);
+  console.log(`   Average per Call: $${summary.costs.averageCostPerCall}`);
+  console.log(`   Cost per Second: $${summary.costs.costPerSecond}/s`);
+  console.log(`\n💾 Cache Performance:`);
+  console.log(`   Cache Hits: ${summary.costs.cacheHits}`);
+  console.log(`   Cache Misses: ${summary.costs.cacheMisses}`);
+  console.log(`   Hit Rate: ${summary.costs.cacheHitRate}`);
+  const cacheSavings = summary.costs.cacheHits * parseFloat(summary.costs.averageCostPerCall);
+  if (cacheSavings > 0) {
+    console.log(`   Estimated Savings: $${cacheSavings.toFixed(4)} (from cache hits)`);
+  }
+  console.log(`\n🔢 Token Usage:`);
+  console.log(`   Input Tokens: ${summary.costs.tokens.input.toLocaleString()}`);
+  console.log(`   Output Tokens: ${summary.costs.tokens.output.toLocaleString()}`);
+  console.log(`   Total Tokens: ${summary.costs.tokens.total.toLocaleString()}`);
+  if (Object.keys(summary.costs.byProvider).length > 0) {
+    console.log(`\n📦 By Provider:`);
+    for (const [provider, data] of Object.entries(summary.costs.byProvider)) {
+      console.log(`   ${provider}:`);
+      console.log(`      Cost: $${data.total.toFixed(4)}`);
+      console.log(`      Calls: ${data.calls}`);
+      console.log(`      Tokens: ${data.tokens.input.toLocaleString()} in, ${data.tokens.output.toLocaleString()} out`);
+    }
+  }
+  if (Object.keys(summary.costs.byTest).length > 0) {
+    console.log(`\n🧪 By Test (Top 10):`);
+    const sortedTests = Object.entries(summary.costs.byTest)
+      .sort((a, b) => b[1].total - a[1].total)
+      .slice(0, 10);
+    for (const [testName, data] of sortedTests) {
+      console.log(`   ${testName}: $${data.total.toFixed(4)} (${data.calls} calls)`);
+    }
+  }
+  console.log('='.repeat(70) + '\n');
+}
+/**
+ * Get all active sessions
+ *
+ * @returns {Array} Active session IDs
+ */
+export function getActiveSessions() {
+  return Array.from(activeSessions.keys());
+}
+/**
+ * Get global cost stats (across all sessions)
+ *
+ * @returns {object} Global cost statistics
+ */
+export function getGlobalCostStats() {
+  const tracker = getCostTracker();
+  return tracker.getStats();
+}

package/src/smart-validator.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Smart Validator Selector
  *
- * Automatically selects the best validator type based on available context.
+ * Selects validator type based on available context.
  * Guides users to the right tool for the job.
  *
  * Design Philosophy:
@@ -36,10 +36,10 @@ import { log, warn } from './logger.mjs';
 /**
  * Smart accessibility validation
  *
- * Automatically chooses the best validator based on available context:
+ * Chooses validator based on available context:
  * - Has page access → uses programmatic (fast, deterministic)
  * - Only has screenshot → uses VLLM (semantic evaluation)
- * - Has both and needs semantic context → uses hybrid (best of both)
+ * - Has both and needs semantic context → uses hybrid (combines both)
  *
  * @param {Object} options - Validation options
  * @param {any} [options.page] - Playwright page object (if available)
@@ -75,7 +75,7 @@ export async function validateAccessibilitySmart(options = {}) {
   // Decision tree:
   // 1. Has page access → use programmatic (fast, deterministic)
-  // 2. Has both + need semantic → use hybrid (best of both)
+  // 2. Has both + need semantic → use hybrid (combines both)
   // 3. Only screenshot → use VLLM (semantic evaluation)
   if (page && !shouldUseHybrid) {
@@ -132,9 +132,9 @@ export async function validateAccessibilitySmart(options = {}) {
 /**
  * Smart state validation
  *
- * Automatically chooses the best validator based on available context:
+ * Chooses validator based on available context:
  * - Has page access + direct state → uses programmatic (fast, deterministic)
- * - Has page access + screenshot + need semantic → uses hybrid (best of both)
+ * - Has page access + screenshot + need semantic → uses hybrid (combines both)
  * - Only screenshot → uses VLLM (extracts state from screenshot)
  *
  * @param {Object} options - Validation options
@@ -236,7 +236,7 @@ export async function validateStateSmart(options = {}) {
 /**
  * Smart element validation
  *
- * Validates element visibility, position, contrast, etc. using the best available method.
+ * Validates element visibility, position, contrast, etc. using available methods.
  *
  * @param {Object} options - Validation options
  * @param {any} options.page - Playwright page object
@@ -317,7 +317,7 @@ export async function validateElementSmart(options = {}) {
 /**
  * Smart validation with automatic tool selection
  *
- * This is the main entry point that automatically selects the best validator
+ * Main entry point that selects validator
  * based on what you're trying to validate and what context you have.
  *
  * @param {Object} options - Validation options