npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 → 0.7.4 - Mend

@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/CHANGELOG.md +127 -11
package/DEPLOYMENT.md +225 -9
package/README.md +71 -80
package/index.d.ts +902 -5
package/package.json +10 -51
package/src/batch-optimizer.mjs +39 -0
package/src/cache.mjs +241 -16
package/src/config.mjs +33 -91
package/src/constants.mjs +54 -0
package/src/convenience.mjs +113 -10
package/src/cost-optimization.mjs +1 -0
package/src/cost-tracker.mjs +134 -2
package/src/data-extractor.mjs +36 -7
package/src/dynamic-few-shot.mjs +69 -11
package/src/errors.mjs +6 -2
package/src/experience-propagation.mjs +12 -0
package/src/experience-tracer.mjs +12 -3
package/src/game-player.mjs +222 -43
package/src/graceful-shutdown.mjs +126 -0
package/src/helpers/playwright.mjs +22 -8
package/src/human-validation-manager.mjs +99 -2
package/src/index.mjs +48 -3
package/src/integrations/playwright.mjs +140 -0
package/src/judge.mjs +699 -24
package/src/load-env.mjs +2 -1
package/src/logger.mjs +31 -3
package/src/model-tier-selector.mjs +1 -221
package/src/natural-language-specs.mjs +31 -3
package/src/persona-enhanced.mjs +4 -2
package/src/persona-experience.mjs +1 -1
package/src/pricing.mjs +28 -0
package/src/prompt-composer.mjs +162 -5
package/src/provider-data.mjs +115 -0
package/src/render-change-detector.mjs +5 -0
package/src/research-enhanced-validation.mjs +7 -5
package/src/retry.mjs +21 -7
package/src/rubrics.mjs +4 -0
package/src/safe-logger.mjs +71 -0
package/src/session-cost-tracker.mjs +320 -0
package/src/smart-validator.mjs +8 -8
package/src/spec-templates.mjs +52 -6
package/src/startup-validation.mjs +127 -0
package/src/temporal-adaptive.mjs +2 -2
package/src/temporal-decision-manager.mjs +1 -271
package/src/temporal-logic.mjs +104 -0
package/src/temporal-note-pruner.mjs +119 -0
package/src/temporal-preprocessor.mjs +1 -543
package/src/temporal.mjs +681 -79
package/src/utils/action-hallucination-detector.mjs +301 -0
package/src/utils/baseline-validator.mjs +82 -0
package/src/utils/cache-stats.mjs +104 -0
package/src/utils/cached-llm.mjs +164 -0
package/src/utils/capability-stratifier.mjs +108 -0
package/src/utils/counterfactual-tester.mjs +83 -0
package/src/utils/error-recovery.mjs +117 -0
package/src/utils/explainability-scorer.mjs +119 -0
package/src/utils/exploratory-automation.mjs +131 -0
package/src/utils/index.mjs +10 -0
package/src/utils/intent-recognizer.mjs +201 -0
package/src/utils/log-sanitizer.mjs +165 -0
package/src/utils/path-validator.mjs +88 -0
package/src/utils/performance-logger.mjs +316 -0
package/src/utils/performance-measurement.mjs +280 -0
package/src/utils/prompt-sanitizer.mjs +213 -0
package/src/utils/rate-limiter.mjs +144 -0
package/src/validation-framework.mjs +24 -20
package/src/validation-result-normalizer.mjs +35 -1
package/src/validation.mjs +75 -25
package/src/validators/accessibility-validator.mjs +144 -0
package/src/validators/hybrid-validator.mjs +48 -4
package/api/health.js +0 -34
package/api/validate.js +0 -252
package/public/index.html +0 -149
package/vercel.json +0 -27

package/src/game-player.mjs CHANGED Viewed

@@ -49,19 +49,45 @@ export async function decideGameAction(gameState, goal, history = []) {
     }
   );
-  // Use VLLM to decide action
-  const actionPrompt = `Based on the game state, decide what action to take.
-    Goal: ${goal}
-    Current state: ${stateEvaluation.reasoning?.substring(0, 200) || 'Unknown'}
-    Previous actions: ${recentHistory.slice(-3).map(h => h.action?.key || h.action?.type || 'unknown').join(', ')}
+  // Enhanced Prompt with Reflexion and Chain of Thought
+  let reflexionContext = '';
+  const lastStep = recentHistory[recentHistory.length - 1];
+  if (lastStep && lastStep.result?.score !== undefined) {
+    const scoreDelta = (stateEvaluation.score || 0) - (lastStep.result.score || 0);
+    if (scoreDelta < 0) {
+      reflexionContext = `CRITICAL REFLEXION: The previous action (${JSON.stringify(lastStep.action)}) caused the score to drop by ${Math.abs(scoreDelta)}.
+      Analyze WHY this failed before choosing the next action. Avoid repeating the same mistake.`;
+    } else if (scoreDelta > 0) {
+      reflexionContext = `SUCCESS ANALYSIS: The previous action (${JSON.stringify(lastStep.action)}) increased the score by ${scoreDelta}. Continue this successful strategy.`;
+    }
+  }
+  const actionPrompt = `You are an expert game-playing agent. Your goal is: "${goal}".
+    ${reflexionContext}
+    CURRENT STATE:
+    - Visual Analysis: ${stateEvaluation.reasoning?.substring(0, 300) || 'No analysis available'}
+    - Score: ${stateEvaluation.score}
+    - History: ${recentHistory.length} steps taken
+    INSTRUCTIONS:
+    1. THINK: Analyze the game state and physics step-by-step. Anticipate the consequences of moving Left, Right, Up, or Down.
+    2. PLAN: Formulate a short-term plan (next 3 steps).
+    3. ACT: Choose the single best immediate action.
+    Return JSON only:
+    {
+      "thought_process": "Step-by-step reasoning...",
+      "plan": "Short term plan...",
+      "type": "keyboard",
+      "key": "ArrowRight"
+    }
-    Return action as JSON: { "type": "keyboard", "key": "ArrowRight" }
     Available actions:
     - keyboard: ArrowLeft, ArrowRight, ArrowUp, ArrowDown, Space, Enter
     - click: { "type": "click", "selector": "#button" }
-    - wait: { "type": "wait", "duration": 100 }
-    Choose the action that best achieves the goal.`;
+    - wait: { "type": "wait", "duration": 100 }`;
   const actionResult = await validateScreenshot(
     gameState.screenshot,
@@ -69,7 +95,8 @@ export async function decideGameAction(gameState, goal, history = []) {
     {
       extractStructured: true,
       testType: 'gameplay-decision',
-      goal: goal
+      goal: goal,
+      temperature: 0.2 // Lower temperature for more deterministic gameplay
     }
   );
@@ -79,6 +106,10 @@ export async function decideGameAction(gameState, goal, history = []) {
     try {
       const parsed = JSON.parse(actionMatch[0]);
       if (parsed.type && (parsed.key || parsed.selector || parsed.duration !== undefined)) {
+        // Log thought process for debugging/transparency
+        if (parsed.thought_process) {
+          log(`[GamePlayer] Agent Thought: ${parsed.thought_process}`);
+        }
         return parsed;
       }
     } catch (e) {
@@ -107,24 +138,46 @@ export async function decideGameAction(gameState, goal, history = []) {
  * @param {Object} action - Action to execute
  */
 export async function executeGameAction(page, action) {
-  switch (action.type) {
-    case 'keyboard':
-      await page.keyboard.press(action.key);
-      break;
-    case 'click':
-      if (action.selector) {
-        await page.click(action.selector);
-      } else {
-        warn('[GamePlayer] Click action missing selector');
-      }
-      break;
-    case 'wait':
-      await page.waitForTimeout(action.duration || 100);
-      break;
-    default:
-      warn(`[GamePlayer] Unknown action type: ${action.type}, defaulting to wait`);
-      await page.waitForTimeout(100);
+  let executionResult = { success: false, error: null };
+  try {
+    switch (action.type) {
+      case 'keyboard':
+        await page.keyboard.press(action.key);
+        executionResult.success = true;
+        break;
+      case 'click':
+        if (action.selector) {
+          // Verify element exists before clicking
+          const exists = await page.locator(action.selector).count() > 0;
+          if (!exists) {
+            executionResult.success = false;
+            executionResult.error = `Element not found: ${action.selector}`;
+            return executionResult;
+          }
+          await page.click(action.selector);
+          executionResult.success = true;
+        } else {
+          warn('[GamePlayer] Click action missing selector');
+          executionResult.error = 'Click action missing selector';
+        }
+        break;
+      case 'wait':
+        await page.waitForTimeout(action.duration || 100);
+        executionResult.success = true;
+        break;
+      default:
+        warn(`[GamePlayer] Unknown action type: ${action.type}, defaulting to wait`);
+        await page.waitForTimeout(100);
+        executionResult.success = true;
+    }
+  } catch (error) {
+    executionResult.success = false;
+    executionResult.error = error.message;
   }
+  return executionResult;
 }
 /**
@@ -189,39 +242,165 @@ export async function playGame(page, options = {}) {
       const screenshotPath = join(screenshotDir, `gameplay-step-${step}.png`);
       writeFileSync(screenshotPath, screenshot);
+      // 2. Extract game state from page (if available)
+      let gameState = null;
+      try {
+        gameState = await page.evaluate(() => {
+          // Try multiple ways to get game state
+          if (window.gameState) {
+            return window.gameState;
+          }
+          // Try common game state patterns
+          if (window.game) {
+            return {
+              score: window.game.score || 0,
+              level: window.game.level || 0,
+              lives: window.game.lives || 0,
+              gameActive: window.game.active !== false
+            };
+          }
+          // Try to extract from DOM
+          const scoreEl = document.querySelector('#score, .score, [data-score]');
+          const score = scoreEl ? parseInt(scoreEl.textContent?.match(/\d+/)?.[0] || '0') : null;
+          return {
+            score,
+            gameActive: true // Assume active if we can't detect
+          };
+        });
+      } catch (error) {
+        // Game state extraction is optional
+        log(`[GamePlayer] Could not extract game state: ${error.message}`);
+      }
       // 2. Understand current state (validation)
       currentState = {
         screenshot: screenshotPath,
         step,
-        timestamp: Date.now()
+        timestamp: Date.now(),
+        gameState // Include extracted game state
       };
-      const stateEvaluation = await validateScreenshot(
-        screenshotPath,
-        `Evaluate current game state. Goal: ${goal}`,
-        {
-          testType: 'gameplay',
-          temporalNotes: history.map(h => ({
-            step: h.step,
-            action: h.action,
-            result: h.result?.score
-          }))
+      // Use TemporalDecisionManager to reduce LLM calls
+      // Only prompt when decision is needed, not on every state change
+      const temporalNotes = history.map(h => ({
+        step: h.step,
+        action: h.action,
+        result: h.result?.score,
+        timestamp: h.state?.timestamp || Date.now()
+      }));
+      let stateEvaluation;
+      if (step > 0 && history.length > 0) {
+        // Use TemporalDecisionManager for subsequent steps
+        try {
+          const { TemporalDecisionManager } = await import('./temporal-decision-manager.mjs');
+          const decisionManager = new TemporalDecisionManager({
+            minNotesForPrompt: 2,
+            coherenceThreshold: 0.5
+          });
+          const currentState = {
+            score: null,
+            step,
+            timestamp: Date.now()
+          };
+          const previousState = history[history.length - 1]?.result || null;
+          const decision = await decisionManager.shouldPrompt(currentState, previousState, temporalNotes, {
+            stage: 'gameplay',
+            testType: 'gameplay'
+          });
+          if (!decision.shouldPrompt && decision.urgency !== 'high' && previousState) {
+            // Don't prompt yet - reuse previous result
+            stateEvaluation = {
+              ...previousState,
+              skipped: true,
+              skipReason: decision.reason,
+              urgency: decision.urgency
+            };
+          } else {
+            // Prompt now (decision point or high urgency)
+            stateEvaluation = await validateScreenshot(
+              screenshotPath,
+              `Evaluate current game state. Goal: ${goal}`,
+              {
+                testType: 'gameplay',
+                temporalNotes,
+                sequenceIndex: step,
+                useTemporalDecision: true,
+                currentState,
+                previousState,
+                previousResult: previousState
+              }
+            );
+          }
+        } catch (error) {
+          // If TemporalDecisionManager fails, proceed with normal validation
+          stateEvaluation = await validateScreenshot(
+            screenshotPath,
+            `Evaluate current game state. Goal: ${goal}`,
+            {
+              testType: 'gameplay',
+              temporalNotes,
+              sequenceIndex: step
+            }
+          );
         }
-      );
+      } else {
+        // First step - always validate
+        stateEvaluation = await validateScreenshot(
+          screenshotPath,
+          `Evaluate current game state. Goal: ${goal}`,
+          {
+            testType: 'gameplay',
+            temporalNotes,
+            sequenceIndex: step
+          }
+        );
+      }
       currentState.evaluation = stateEvaluation;
       // 3. Decide what action to take (decision-making)
-      const action = await decideGameAction(
+      let action = await decideGameAction(
         currentState,
         goal,
         history
       );
-      log(`[GamePlayer] Step ${step}: score=${stateEvaluation.score}, action=${action.type}:${action.key || action.selector || ''}`);
+      // Try action, with simple retry on failure
+      let actionExecuted = false;
+      let retries = 0;
+      const maxRetries = 2;
-      // 4. Execute action (Playwright)
-      await executeGameAction(page, action);
+      while (!actionExecuted && retries < maxRetries) {
+        log(`[GamePlayer] Step ${step}: score=${stateEvaluation.score}, action=${action.type}:${action.key || action.selector || ''}`);
+        // 4. Execute action (Playwright)
+        const executionResult = await executeGameAction(page, action);
+        if (executionResult.success) {
+          actionExecuted = true;
+          action.executionResult = executionResult;
+        } else {
+          // Action failed - wait and retry, or try simple alternative
+          retries++;
+          if (retries < maxRetries) {
+            const { createExploratoryStrategy } = await import('./utils/exploratory-automation.mjs');
+            const exploratoryStrategy = createExploratoryStrategy({ maxAttempts: 2 });
+            const nextAction = exploratoryStrategy.getNextAction(currentState, [action], goal);
+            if (nextAction) {
+              log(`[GamePlayer] Action failed, trying alternative: ${nextAction.type}`);
+              action = nextAction;
+            } else {
+              // Wait and retry original action
+              await page.waitForTimeout(500);
+            }
+          }
+        }
+      }
       // 5. Wait for next frame
       await page.waitForTimeout(1000 / fps);

package/src/graceful-shutdown.mjs ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * Graceful Shutdown Handler
+ *
+ * Handles graceful shutdown for long-running processes.
+ * Ensures in-flight operations complete, caches are flushed, and resources are cleaned up.
+ */
+import { log, warn, error } from './logger.mjs';
+let shutdownHandlers = [];
+let isShuttingDown = false;
+let shutdownTimeout = 30000; // 30 seconds default timeout
+/**
+ * Register a shutdown handler
+ *
+ * @param {Function} handler - Async function to call during shutdown
+ * @param {number} [priority=0] - Priority (higher = called first)
+ */
+export function registerShutdownHandler(handler, priority = 0) {
+  if (typeof handler !== 'function') {
+    throw new TypeError('Shutdown handler must be a function');
+  }
+  shutdownHandlers.push({ handler, priority });
+  // Sort by priority (higher first)
+  shutdownHandlers.sort((a, b) => b.priority - a.priority);
+}
+/**
+ * Unregister a shutdown handler
+ *
+ * @param {Function} handler - Handler to remove
+ */
+export function unregisterShutdownHandler(handler) {
+  shutdownHandlers = shutdownHandlers.filter(h => h.handler !== handler);
+}
+/**
+ * Perform graceful shutdown
+ *
+ * @param {Object} [options={}] - Shutdown options
+ * @param {number} [options.timeout=30000] - Timeout in milliseconds
+ * @param {string} [options.signal='SIGTERM'] - Signal name for logging
+ * @returns {Promise<void>}
+ */
+export async function gracefulShutdown(options = {}) {
+  if (isShuttingDown) {
+    warn('[GracefulShutdown] Shutdown already in progress');
+    return;
+  }
+  isShuttingDown = true;
+  const { timeout = shutdownTimeout, signal = 'SIGTERM' } = options;
+  log(`[GracefulShutdown] Initiating graceful shutdown (signal: ${signal})...`);
+  // Set timeout to force exit if shutdown takes too long
+  const timeoutId = setTimeout(() => {
+    warn('[GracefulShutdown] Shutdown timeout exceeded, forcing exit');
+    process.exit(1);
+  }, timeout);
+  try {
+    // Execute shutdown handlers in priority order
+    for (const { handler } of shutdownHandlers) {
+      try {
+        await handler();
+      } catch (err) {
+        warn(`[GracefulShutdown] Handler failed:`, err);
+        // Continue with other handlers even if one fails
+      }
+    }
+    // Note: Cache is file-based and doesn't need explicit flushing
+    // File writes are atomic, so no cleanup needed
+    log('[GracefulShutdown] Cache is file-based, no flush needed');
+    clearTimeout(timeoutId);
+    log('[GracefulShutdown] Shutdown complete');
+    process.exit(0);
+  } catch (err) {
+    clearTimeout(timeoutId);
+    error('[GracefulShutdown] Shutdown failed:', err);
+    process.exit(1);
+  }
+}
+/**
+ * Initialize graceful shutdown handlers
+ *
+ * Registers signal handlers for SIGTERM and SIGINT.
+ *
+ * @param {Object} [options={}] - Initialization options
+ * @param {number} [options.timeout=30000] - Shutdown timeout
+ */
+export function initGracefulShutdown(options = {}) {
+  shutdownTimeout = options.timeout || 30000;
+  // Register signal handlers
+  process.on('SIGTERM', () => {
+    log('[GracefulShutdown] Received SIGTERM');
+    gracefulShutdown({ signal: 'SIGTERM', timeout: shutdownTimeout });
+  });
+  process.on('SIGINT', () => {
+    log('[GracefulShutdown] Received SIGINT (Ctrl+C)');
+    gracefulShutdown({ signal: 'SIGINT', timeout: shutdownTimeout });
+  });
+  // Handle uncaught exceptions (best-effort cleanup)
+  process.on('uncaughtException', (err) => {
+    error('[GracefulShutdown] Uncaught exception:', err);
+    gracefulShutdown({ signal: 'uncaughtException', timeout: 5000 }); // Shorter timeout for crashes
+  });
+  // Handle unhandled promise rejections
+  process.on('unhandledRejection', (reason, promise) => {
+    warn('[GracefulShutdown] Unhandled promise rejection:', reason);
+    // Don't shutdown on unhandled rejections (may be recoverable)
+    // But log for monitoring
+  });
+  log('[GracefulShutdown] Graceful shutdown handlers initialized');
+}

package/src/helpers/playwright.mjs CHANGED Viewed

@@ -68,13 +68,27 @@ export async function getPlaywrightPage(options = {}) {
     };
   }
-  const browser = await chromium.launch(options.browserOptions || {});
-  const page = await browser.newPage();
-  return {
-    page,
-    browser,
-    isMock: false
-  };
+  try {
+    const browser = await chromium.launch(options.browserOptions || {});
+    const page = await browser.newPage();
+    return {
+      page,
+      browser,
+      isMock: false
+    };
+  } catch (error) {
+    // Browser executable not found, fallback to mock
+    if (error.message.includes('Executable doesn\'t exist') ||
+        error.message.includes('browserType.launch') ||
+        error.message.includes('Browser not found')) {
+      return {
+        page: createMockPage(),
+        browser: null,
+        isMock: true
+      };
+    }
+    throw error;
+  }
 }

package/src/human-validation-manager.mjs CHANGED Viewed

@@ -5,8 +5,8 @@
  * - Non-blocking: Doesn't slow down evaluations
  * - Automatic: Collects VLLM judgments when enabled
  * - Smart sampling: Requests human validation for interesting cases
- * - Learning: Automatically calibrates based on collected data
- * - Seamless: Works with all existing systems (batching, temporal, personas)
+ * - Learning: Calibrates based on collected data
+ * - Integration: Works with all existing systems (batching, temporal, personas)
  */
 import { warn, log } from './logger.mjs';
@@ -362,6 +362,103 @@ export class HumanValidationManager {
     };
   }
+  /**
+   * Track calibration degradation over screenshot sequences
+   *
+   * @param {number} sequenceIndex - Index in sequence
+   * @param {Object} result - Validation result
+   * @returns {Object} Degradation status
+   */
+  trackSequenceCalibration(sequenceIndex, result) {
+    if (!this.sequenceHistory) {
+      this.sequenceHistory = [];
+    }
+    const entry = {
+      index: sequenceIndex,
+      timestamp: Date.now(),
+      confidence: result.confidence || 0.5,
+      uncertainty: result.uncertainty || 0.5,
+      score: result.score,
+      logprobs: result.logprobs
+    };
+    this.sequenceHistory.push(entry);
+    // Detect degradation (compare recent vs early)
+    if (this.sequenceHistory.length >= 5) {
+      const recent = this.sequenceHistory.slice(-5);
+      const early = this.sequenceHistory.slice(0, 5);
+      const recentAvgConfidence = recent.reduce((sum, e) => sum + e.confidence, 0) / recent.length;
+      const earlyAvgConfidence = early.reduce((sum, e) => sum + e.confidence, 0) / early.length;
+      const degradation = earlyAvgConfidence - recentAvgConfidence;
+      const degradationThreshold = 0.15; // 15% drop
+      if (degradation > degradationThreshold) {
+        return {
+          degraded: true,
+          degradation,
+          recommendation: 'recalibrate_or_reduce_sequence',
+          suggestedAction: 'Use temporal graph representation or reduce sequence length'
+        };
+      }
+    }
+    return { degraded: false };
+  }
+  /**
+   * Get calibration quality metrics for sequence
+   */
+  getSequenceCalibrationMetrics() {
+    if (!this.sequenceHistory || this.sequenceHistory.length < 2) {
+      return { quality: 'unknown', recommendation: 'insufficient_data' };
+    }
+    const confidences = this.sequenceHistory.map(e => e.confidence);
+    const variance = this.calculateVariance(confidences);
+    const trend = this.calculateTrend(confidences);
+    if (variance > 0.1 && trend < -0.05) {
+      return {
+        quality: 'degrading',
+        variance,
+        trend,
+        recommendation: 'recalibrate_or_reduce_sequence'
+      };
+    }
+    return {
+      quality: variance < 0.05 ? 'stable' : 'variable',
+      variance,
+      trend
+    };
+  }
+  /**
+   * Calculate variance of values
+   */
+  calculateVariance(values) {
+    if (values.length === 0) return 0;
+    const mean = values.reduce((a, b) => a + b, 0) / values.length;
+    const squaredDiffs = values.map(v => Math.pow(v - mean, 2));
+    return squaredDiffs.reduce((a, b) => a + b, 0) / values.length;
+  }
+  /**
+   * Calculate trend of values (positive = increasing, negative = decreasing)
+   */
+  calculateTrend(values) {
+    if (values.length < 2) return 0;
+    const firstHalf = values.slice(0, Math.floor(values.length / 2));
+    const secondHalf = values.slice(Math.floor(values.length / 2));
+    const firstAvg = firstHalf.reduce((a, b) => a + b, 0) / firstHalf.length;
+    const secondAvg = secondHalf.reduce((a, b) => a + b, 0) / secondHalf.length;
+    return (secondAvg - firstAvg) / firstAvg;
+  }
   /**
    * Apply calibration adjustments to VLLM score
    *

package/src/index.mjs CHANGED Viewed

@@ -17,10 +17,35 @@
 import { loadEnv } from './load-env.mjs';
 loadEnv();
+// Optional: Initialize graceful shutdown (only in Node.js environments, not browser)
+// Use dynamic import to avoid top-level await (fire-and-forget)
+if (typeof process !== 'undefined' && process.env.NODE_ENV !== 'test') {
+  import('./graceful-shutdown.mjs').then(({ initGracefulShutdown }) => {
+    initGracefulShutdown({ timeout: 30000 });
+  }).catch(() => {
+    // Graceful shutdown is optional, don't fail if unavailable
+  });
+}
 import { VLLMJudge, validateScreenshot as _validateScreenshot } from './judge.mjs';
 export { VLLMJudge, _validateScreenshot as validateScreenshot };
+// Export startup validation utilities
+export { validateStartup, validateStartupSoft } from './startup-validation.mjs';
+// Export graceful shutdown utilities
+export { initGracefulShutdown, registerShutdownHandler, gracefulShutdown } from './graceful-shutdown.mjs';
+// Export performance measurement utilities
+export {
+  PerformanceMeasurement,
+  PerformanceProfiler,
+  measureAsync,
+  measureSync,
+  getProfiler
+} from './utils/performance-measurement.mjs';
 /**
  * Extract semantic information from VLLM judgment text
  *
@@ -28,7 +53,7 @@ export { VLLMJudge, _validateScreenshot as validateScreenshot };
  * Useful for custom implementations that need to parse judgment text.
  *
  * @param {string | object} judgment - Judgment text or object from VLLM
- * @returns {import('./index.mjs').SemanticInfo} Structured semantic information with score, issues, assessment, reasoning
+ * @returns {Object} Structured semantic information with score, issues, assessment, reasoning, brutalistViolations (optional), zeroToleranceViolations (optional)
  */
 export function extractSemanticInfo(judgment) {
   // Create a temporary judge instance to access the method
@@ -170,12 +195,31 @@ export {
   calculateBackoff,
   enhanceErrorMessage
 } from './retry.mjs';
+// Cost optimization utilities
+export {
+  calculateCostComparison,
+  optimizeCost
+} from './cost-optimization.mjs';
 export {
   CostTracker,
   getCostTracker,
   recordCost,
-  getCostStats
+  getCostStats,
+  setBudgetLimit,
+  getBudgetStatus
 } from './cost-tracker.mjs';
+// Session-level cost tracking
+export {
+  startSession,
+  endSession,
+  getSessionCosts,
+  recordSessionCost,
+  recordSessionCacheHit,
+  recordSessionCacheMiss,
+  getActiveSessions,
+  getGlobalCostStats
+} from './session-cost-tracker.mjs';
 export {
   DEFAULT_RUBRIC,
   buildRubricPrompt,
@@ -281,7 +325,8 @@ export {
 export {
   testGameplay,
   testBrowserExperience,
-  validateWithGoals
+  validateWithGoals,
+  validatePage
 } from './convenience.mjs';
 // Game playing (optional - requires Playwright)