npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 → 0.7.4 - Mend

@arclabs561/ai-visual-test 0.5.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/CHANGELOG.md +127 -11
package/DEPLOYMENT.md +225 -9
package/README.md +71 -80
package/index.d.ts +902 -5
package/package.json +10 -51
package/src/batch-optimizer.mjs +39 -0
package/src/cache.mjs +241 -16
package/src/config.mjs +33 -91
package/src/constants.mjs +54 -0
package/src/convenience.mjs +113 -10
package/src/cost-optimization.mjs +1 -0
package/src/cost-tracker.mjs +134 -2
package/src/data-extractor.mjs +36 -7
package/src/dynamic-few-shot.mjs +69 -11
package/src/errors.mjs +6 -2
package/src/experience-propagation.mjs +12 -0
package/src/experience-tracer.mjs +12 -3
package/src/game-player.mjs +222 -43
package/src/graceful-shutdown.mjs +126 -0
package/src/helpers/playwright.mjs +22 -8
package/src/human-validation-manager.mjs +99 -2
package/src/index.mjs +48 -3
package/src/integrations/playwright.mjs +140 -0
package/src/judge.mjs +699 -24
package/src/load-env.mjs +2 -1
package/src/logger.mjs +31 -3
package/src/model-tier-selector.mjs +1 -221
package/src/natural-language-specs.mjs +31 -3
package/src/persona-enhanced.mjs +4 -2
package/src/persona-experience.mjs +1 -1
package/src/pricing.mjs +28 -0
package/src/prompt-composer.mjs +162 -5
package/src/provider-data.mjs +115 -0
package/src/render-change-detector.mjs +5 -0
package/src/research-enhanced-validation.mjs +7 -5
package/src/retry.mjs +21 -7
package/src/rubrics.mjs +4 -0
package/src/safe-logger.mjs +71 -0
package/src/session-cost-tracker.mjs +320 -0
package/src/smart-validator.mjs +8 -8
package/src/spec-templates.mjs +52 -6
package/src/startup-validation.mjs +127 -0
package/src/temporal-adaptive.mjs +2 -2
package/src/temporal-decision-manager.mjs +1 -271
package/src/temporal-logic.mjs +104 -0
package/src/temporal-note-pruner.mjs +119 -0
package/src/temporal-preprocessor.mjs +1 -543
package/src/temporal.mjs +681 -79
package/src/utils/action-hallucination-detector.mjs +301 -0
package/src/utils/baseline-validator.mjs +82 -0
package/src/utils/cache-stats.mjs +104 -0
package/src/utils/cached-llm.mjs +164 -0
package/src/utils/capability-stratifier.mjs +108 -0
package/src/utils/counterfactual-tester.mjs +83 -0
package/src/utils/error-recovery.mjs +117 -0
package/src/utils/explainability-scorer.mjs +119 -0
package/src/utils/exploratory-automation.mjs +131 -0
package/src/utils/index.mjs +10 -0
package/src/utils/intent-recognizer.mjs +201 -0
package/src/utils/log-sanitizer.mjs +165 -0
package/src/utils/path-validator.mjs +88 -0
package/src/utils/performance-logger.mjs +316 -0
package/src/utils/performance-measurement.mjs +280 -0
package/src/utils/prompt-sanitizer.mjs +213 -0
package/src/utils/rate-limiter.mjs +144 -0
package/src/validation-framework.mjs +24 -20
package/src/validation-result-normalizer.mjs +35 -1
package/src/validation.mjs +75 -25
package/src/validators/accessibility-validator.mjs +144 -0
package/src/validators/hybrid-validator.mjs +48 -4
package/api/health.js +0 -34
package/api/validate.js +0 -252
package/public/index.html +0 -149
package/vercel.json +0 -27

package/src/convenience.mjs CHANGED Viewed

@@ -23,7 +23,7 @@ import { TEMPORAL_CONSTANTS } from './constants.mjs';
 /**
  * Test gameplay with variable goals
  *
- * Complete workflow for testing games with variable goals/prompts.
+ * Workflow for testing games with variable goals/prompts.
  * Originally motivated by interactive web applications that require
  * real-time validation, variable goals, and temporal understanding.
  *
@@ -96,7 +96,11 @@ export async function testGameplay(page, options = {}) {
     evaluations: [],
     aggregated: null,
     consistency: null,
-    propagation: []
+    propagation: [],
+    temporalScreenshots: [], // Initialize to empty array for consistency
+    processedTemporalNotes: null, // Initialize to null
+    temporalGraph: null, // Initialize to null
+    selectedScreenshots: undefined // Only set if >10 screenshots
   };
   try {
@@ -173,8 +177,9 @@ export async function testGameplay(page, options = {}) {
       result.temporalScreenshots = temporalScreenshots;
       trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
-      // IMPROVEMENT: Use temporal preprocessing if requested (better performance)
-      if (useTemporalPreprocessing && temporalScreenshots.length > 0) {
+        // Use temporal preprocessing by default
+        // Activity-based: high-Hz uses cache, low-Hz does expensive preprocessing
+      if (temporalScreenshots.length > 0) {
         const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
         const preprocessingManager = createTemporalPreprocessingManager();
         const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
@@ -250,7 +255,7 @@ export async function testGameplay(page, options = {}) {
       // Always return aggregated notes (even if empty) for consistency
     if (allNotes.length > 0) {
       // Use fixed temporal aggregation system
-      const aggregated = aggregateTemporalNotes(allNotes, {
+      const aggregated = await aggregateTemporalNotes(allNotes, {
         windowSize: 5000,
         decayFactor: 0.9
       });
@@ -281,11 +286,58 @@ export async function testGameplay(page, options = {}) {
         };
       }
+      // IMPROVEMENT: Build temporal graph for better coherence understanding
+      try {
+        const { buildTemporalGraph } = await import('./temporal.mjs');
+        const temporalGraph = await buildTemporalGraph(allNotes, {
+          windowSize: 5000,
+          decayFactor: 0.9,
+          useLLM: false, // Use keyword matching for speed in gameplay
+          frequency: fps // Auto-detect extraction method based on frequency
+        });
+        result.temporalGraph = temporalGraph;
+        trackPropagation('temporal-graph', {
+          nodes: temporalGraph.graph?.nodes?.length || 0,
+          edges: temporalGraph.graph?.edges?.length || 0,
+          averageCoherence: temporalGraph.graph?.averageCoherence || 0,
+          entityCount: Object.keys(temporalGraph.graph?.entities || {}).length
+        }, 'Built temporal graph representation');
+      } catch (error) {
+        warn(`[Convenience] Temporal graph building failed: ${error.message}`);
+        result.temporalGraph = null;
+      }
+      // IMPROVEMENT: Select representative screenshots for context window management
+      if (result.temporalScreenshots && result.temporalScreenshots.length > 10) {
+        try {
+          const { selectRepresentativeScreenshots } = await import('./temporal-note-pruner.mjs');
+          const evaluations = allNotes.map(n => ({ score: n.score || 0 }));
+          const selectedScreenshots = selectRepresentativeScreenshots(
+            result.temporalScreenshots,
+            evaluations,
+            {
+              maxScreenshots: 10,
+              strategy: 'keyframes' // Use keyframes for gameplay (captures state changes)
+            }
+          );
+          result.selectedScreenshots = selectedScreenshots;
+          trackPropagation('screenshot-selection', {
+            original: result.temporalScreenshots.length,
+            selected: selectedScreenshots.length,
+            reduction: ((result.temporalScreenshots.length - selectedScreenshots.length) / result.temporalScreenshots.length * 100).toFixed(1) + '%'
+          }, 'Selected representative screenshots for context management');
+        } catch (error) {
+          warn(`[Convenience] Screenshot selection failed: ${error.message}`);
+          result.selectedScreenshots = result.temporalScreenshots; // Fallback to all
+        }
+      }
       trackPropagation('aggregation', {
         windows: aggregated.windows.length,
         coherence: aggregated.coherence,
-        scales: Object.keys(result.aggregatedMultiScale.scales || {})
-      }, 'Aggregated temporal notes with multi-scale');
+        scales: Object.keys(result.aggregatedMultiScale.scales || {}),
+        graphNodes: result.temporalGraph?.graph?.nodes?.length || 0
+      }, 'Aggregated temporal notes with multi-scale and temporal graph');
     } else {
       // Return empty aggregated structure if no notes (for consistency)
       result.aggregated = {
@@ -373,7 +425,7 @@ export async function testGameplay(page, options = {}) {
 /**
  * Test browser experience with multiple stages
  *
- * Complete workflow for testing browser experiences across multiple stages
+ * Workflow for testing browser experiences across multiple stages
  * (initial, form, payment, gameplay, etc.).
  *
  * @param {import('playwright').Page} page - Playwright page object
@@ -486,7 +538,7 @@ export async function testBrowserExperience(page, options = {}) {
       // Aggregate temporal notes across all stages
       const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
       if (allStageNotes.length > 0) {
-        const stageAggregated = aggregateTemporalNotes(allStageNotes, {
+        const stageAggregated = await aggregateTemporalNotes(allStageNotes, {
           windowSize: 10000,
           decayFactor: 0.9
         });
@@ -581,7 +633,7 @@ export async function validateWithGoals(screenshotPath, options = {}) {
   } else if (context.notes && context.notes.length > 0) {
     // Auto-aggregate if notes provided but not aggregated
     try {
-      temporalNotes = aggregateTemporalNotes(context.notes, {
+      temporalNotes = await aggregateTemporalNotes(context.notes, {
         windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
         decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
       });
@@ -615,3 +667,54 @@ export async function validateWithGoals(screenshotPath, options = {}) {
   };
 }
+/**
+ * Validate a Playwright Page directly
+ *
+ * Handles screenshotting, code extraction, and validation in one step.
+ * Reduces boilerplate for common Playwright testing workflows.
+ *
+ * @param {import('playwright').Page} page - Playwright page object
+ * @param {string} prompt - Evaluation prompt
+ * @param {Object} options - Validation options
+ * @param {boolean} [options.fullPage] - Capture full page screenshot
+ * @param {boolean} [options.captureCode] - Extract rendered code (default: true)
+ * @param {string} [options.tempDir] - Directory for temp screenshot (default: os.tmpdir())
+ * @param {boolean} [options.keepScreenshot] - Keep screenshot after validation (default: false)
+ * @returns {Promise<Object>} Validation result
+ */
+export async function validatePage(page, prompt, options = {}) {
+  if (!page || typeof page.screenshot !== 'function') {
+    throw new ValidationError('validatePage: page must be a Playwright Page object', { received: typeof page });
+  }
+  // Create temp screenshot
+  const fs = await import('fs');
+  const path = await import('path');
+  const os = await import('os');
+  const tempDir = options.tempDir || os.tmpdir();
+  const screenshotPath = path.join(tempDir, `validate-page-${Date.now()}.png`);
+  try {
+    await page.screenshot({ path: screenshotPath, fullPage: options.fullPage ?? false });
+    // Extract code if requested
+    let renderedCode = null;
+    if (options.captureCode !== false) {
+      renderedCode = await extractRenderedCode(page);
+    }
+    // Validate
+    const result = await validateScreenshot(screenshotPath, prompt, {
+      ...options,
+      renderedCode
+    });
+    return result;
+  } finally {
+    // Cleanup unless requested to keep
+    if (!options.keepScreenshot && fs.existsSync(screenshotPath)) {
+      fs.unlinkSync(screenshotPath);
+    }
+  }
+}

package/src/cost-optimization.mjs ADDED Viewed

@@ -0,0 +1 @@

package/src/cost-tracker.mjs CHANGED Viewed

@@ -2,9 +2,11 @@
  * Cost Tracking Utilities
  *
  * Tracks API costs over time, provides cost estimates, and helps optimize spending.
+ * Includes budget limits and alerting.
  */
 import { getCached, setCached } from './cache.mjs';
+import { warn, log } from './logger.mjs';
 /**
  * Cost Tracker Class
@@ -22,11 +24,21 @@ export class CostTracker {
    * Load costs from cache/storage
    */
   loadCosts() {
+    const defaultCosts = { history: [], totals: { total: 0, count: 0 }, byProvider: {}, byDate: {} };
     try {
       const cached = getCached(this.storageKey, 'cost-tracker', {});
-      return cached || { history: [], totals: {}, byProvider: {} };
+      if (cached && typeof cached === 'object' && cached.history) {
+        // Ensure all required properties exist
+        return {
+          history: cached.history || [],
+          totals: { total: 0, count: 0, ...cached.totals },
+          byProvider: cached.byProvider || {},
+          byDate: cached.byDate || {}
+        };
+      }
+      return defaultCosts;
     } catch {
-      return { history: [], totals: {}, byProvider: {} };
+      return defaultCosts;
     }
   }
@@ -196,6 +208,106 @@ export class CostTracker {
     };
   }
+  /**
+   * Set budget limit with alert thresholds
+   *
+   * @param {number} budgetLimit - Total budget limit (USD)
+   * @param {Object} [options={}] - Budget options
+   * @param {number} [options.warningThreshold=0.8] - Warn at this percentage (0-1)
+   * @param {Function} [options.onWarning] - Callback when warning threshold reached
+   * @param {Function} [options.onExceeded] - Callback when budget exceeded
+   */
+  setBudgetLimit(budgetLimit, options = {}) {
+    const { warningThreshold = 0.8, onWarning = null, onExceeded = null } = options;
+    if (!this.costs.budgets) {
+      this.costs.budgets = [];
+    }
+    const budget = {
+      limit: budgetLimit,
+      warningThreshold,
+      onWarning,
+      onExceeded,
+      createdAt: Date.now()
+    };
+    this.costs.budgets.push(budget);
+    this.saveCosts();
+    // Check immediately
+    this.checkBudgets();
+  }
+  /**
+   * Check all budget limits and trigger alerts
+   *
+   * @returns {Array} Array of budget status objects
+   */
+  checkBudgets() {
+    if (!this.costs.budgets || this.costs.budgets.length === 0) {
+      return [];
+    }
+    const stats = this.getStats();
+    const current = stats.total;
+    const statuses = [];
+    for (const budget of this.costs.budgets) {
+      const percentage = current / budget.limit;
+      const status = {
+        limit: budget.limit,
+        current,
+        percentage,
+        remaining: Math.max(0, budget.limit - current),
+        warningThreshold: budget.warningThreshold,
+        status: percentage >= 1 ? 'exceeded' : (percentage >= budget.warningThreshold ? 'warning' : 'ok')
+      };
+      statuses.push(status);
+      // Trigger callbacks
+      if (percentage >= 1 && budget.onExceeded) {
+        try {
+          budget.onExceeded(status);
+        } catch (err) {
+          // Don't fail if callback errors
+        }
+      } else if (percentage >= budget.warningThreshold && budget.onWarning) {
+        try {
+          budget.onWarning(status);
+        } catch (err) {
+          // Don't fail if callback errors
+        }
+      }
+    }
+    return statuses;
+  }
+  /**
+   * Get budget status
+   *
+   * @returns {Object} Budget status summary
+   */
+  getBudgetStatus() {
+    const statuses = this.checkBudgets();
+    if (statuses.length === 0) {
+      return { hasBudgets: false };
+    }
+    const exceeded = statuses.filter(s => s.status === 'exceeded');
+    const warnings = statuses.filter(s => s.status === 'warning');
+    return {
+      hasBudgets: true,
+      totalBudgets: statuses.length,
+      exceeded: exceeded.length,
+      warnings: warnings.length,
+      statuses
+    };
+  }
   /**
    * Reset cost tracking
    */
@@ -255,3 +367,23 @@ export function getCostStats() {
   return getCostTracker().getStats();
 }
+/**
+ * Set budget limit (convenience function)
+ *
+ * @param {number} budgetLimit - Budget limit in USD
+ * @param {Object} [options={}] - Budget options
+ */
+export function setBudgetLimit(budgetLimit, options = {}) {
+  const tracker = getCostTracker();
+  tracker.setBudgetLimit(budgetLimit, options);
+}
+/**
+ * Get budget status (convenience function)
+ *
+ * @returns {Object} Budget status
+ */
+export function getBudgetStatus() {
+  return getCostTracker().getBudgetStatus();
+}

package/src/data-extractor.mjs CHANGED Viewed

@@ -12,6 +12,7 @@
 import { createConfig } from './config.mjs';
 import { loadEnv } from './load-env.mjs';
 import { warn } from './logger.mjs';
+import { ValidationError } from './errors.mjs';
 // Load env before LLM utils
 loadEnv();
 // Use shared LLM utility library for text-only calls (optional dependency)
@@ -111,7 +112,16 @@ Return ONLY the JSON object, no other text.`;
       if (jsonMatch) {
         parsed = JSON.parse(jsonMatch[0]);
       } else {
-        throw new Error('Could not extract JSON from response');
+        throw new ValidationError(
+          'Could not extract JSON from response. The LLM response did not contain valid JSON. ' +
+          'This may indicate the model failed to follow the schema format. ' +
+          'Try: 1) Simplifying the schema, 2) Using a more capable model tier, or 3) Adding examples to the prompt.',
+          {
+            responseLength: response?.length || 0,
+            responsePreview: response?.substring(0, 200) || 'No response',
+            schema: schema
+          }
+        );
       }
     }
     if (parsed && validateSchema(parsed, schema)) {
@@ -126,25 +136,44 @@ Return ONLY the JSON object, no other text.`;
 /**
  * Call LLM API (text-only, no vision)
+ * Uses cached wrapper for better performance and cost reduction
  * Uses shared utility with advanced tier for better extraction quality
  */
 async function callLLMForExtraction(prompt, config) {
   const apiKey = config.apiKey;
   const provider = config.provider || 'gemini';
-  // Try to use optional llm-utils library if available
+  // Use cached LLM wrapper (reduces costs and improves performance)
   try {
-    const llmUtils = await import('@arclabs561/llm-utils');
-    const callLLMUtil = llmUtils.callLLM;
+    const { callLLMCached } = await import('./utils/cached-llm.mjs');
     // Use advanced tier for data extraction (needs higher quality)
-    return await callLLMUtil(prompt, provider, apiKey, {
+    return await callLLMCached(prompt, provider, apiKey, {
       tier: 'advanced', // Data extraction benefits from better models
       temperature: 0.1,
       maxTokens: 1000,
+      useCache: true, // Enable caching by default
     });
   } catch (error) {
-    // Fallback: use local implementation or throw
-    throw new Error(`LLM extraction requires @arclabs561/llm-utils package: ${error.message}`);
+    // Fallback: try uncached version if cached wrapper fails
+    try {
+      const llmUtils = await import('@arclabs561/llm-utils');
+      return await llmUtils.callLLM(prompt, provider, apiKey, {
+        tier: 'advanced',
+        temperature: 0.1,
+        maxTokens: 1000,
+      });
+    } catch (fallbackError) {
+      throw new ValidationError(
+        `LLM extraction requires @arclabs561/llm-utils package. ` +
+        `Install it with: npm install @arclabs561/llm-utils. ` +
+        `Error: ${fallbackError.message}`,
+        {
+          package: '@arclabs561/llm-utils',
+          installationCommand: 'npm install @arclabs561/llm-utils',
+          originalError: fallbackError.message
+        }
+      );
+    }
   }
 }

package/src/dynamic-few-shot.mjs CHANGED Viewed

@@ -8,9 +8,8 @@
  * - ES-KNN: arXiv:2506.05614 (Exemplar Selection KNN using semantic similarity)
  * - KATE: arXiv:2101.06804 (Foundational work on kNN-augmented in-context examples)
  *
- * Note: This implementation uses keyword-based similarity (Jaccard) rather than
- * true semantic embeddings due to npm package constraints. For full ES-KNN,
- * embedding-based cosine similarity would be required.
+ * This implementation supports both keyword-based similarity (Jaccard) and
+ * embedding-based semantic similarity. Embeddings are preferred when available.
  *
  * This module provides dynamic few-shot example selection based on similarity
  * to the evaluation prompt.
@@ -19,20 +18,25 @@
 /**
  * Select few-shot examples based on semantic similarity to prompt
  *
+ * Research: ES-KNN shows embedding-based selection improves performance by 10-20%
+ * over keyword-based selection. This implementation supports both methods.
+ *
  * @param {string} prompt - Evaluation prompt
  * @param {Array<import('./index.mjs').FewShotExample>} examples - Available examples
  * @param {{
  *   maxExamples?: number;
  *   similarityThreshold?: number;
  *   useSemanticMatching?: boolean;
+ *   task?: string;
  * }} [options={}] - Selection options
- * @returns {Array<import('./index.mjs').FewShotExample>} Selected examples
+ * @returns {Promise<Array<import('./index.mjs').FewShotExample>>} Selected examples
  */
-export function selectFewShotExamples(prompt, examples = [], options = {}) {
+export async function selectFewShotExamples(prompt, examples = [], options = {}) {
   const {
     maxExamples = 3,
     similarityThreshold = 0.3,
-    useSemanticMatching = true
+    useSemanticMatching = true,
+    task = 'general'
   } = options;
   // Validate inputs
@@ -50,14 +54,68 @@ export function selectFewShotExamples(prompt, examples = [], options = {}) {
     return examples.slice(0, maxExamples);
   }
-  // Simple keyword-based similarity (for npm package - full semantic matching would require embeddings)
-  const promptKeywords = extractKeywords(prompt.toLowerCase());
+  // UX OPTIMIZATION: Auto-disable embeddings for large example arrays (>100) unless explicitly requested
+  // - Why: Embeddings add ~15ms per example, so 1000 examples = ~15s latency
+  // - User experience: Most users have 10-50 examples, so embeddings are fast and valuable
+  // - Edge case: Large datasets (1000+ examples) should use keyword matching for speed
+  // - Exception: If useEmbeddings is explicitly set to true, respect user preference
+  const exampleCount = examples.length;
+  const shouldUseEmbeddingsForLargeArrays = options.useEmbeddings === true;
+  const autoDisableForLargeArrays = exampleCount > 100 && !shouldUseEmbeddingsForLargeArrays;
+  // Try embeddings first (more accurate) - but skip for large arrays unless explicitly requested
+  if (!autoDisableForLargeArrays) {
+    try {
+      const { instructionSemanticSimilarity, isInstructionEmbeddingsAvailable } = await import('../evaluation/utils/instruction-embeddings.mjs');
+      const { semanticSimilarity, isEmbeddingsAvailable } = await import('../evaluation/utils/semantic-matcher.mjs');
+      const useInstructionEmbeddings = await isInstructionEmbeddingsAvailable();
+      const useGeneralEmbeddings = !useInstructionEmbeddings && await isEmbeddingsAvailable();
+      if (useInstructionEmbeddings || useGeneralEmbeddings) {
+        // Use embeddings for similarity calculation
+        const similarityFn = useInstructionEmbeddings
+          ? (text1, text2) => instructionSemanticSimilarity(text1, text2, task)
+          : (text1, text2) => semanticSimilarity(text1, text2);
+        // Score each example using embeddings
+        const scored = await Promise.all(
+          examples.map(async (example) => {
+            const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
+            const similarity = await similarityFn(prompt, exampleText);
+            return {
+              example,
+              similarity: similarity !== null ? similarity : 0
+            };
+          })
+        );
+        // Sort by similarity and take top N
+        return scored
+          .filter(s => s.similarity >= similarityThreshold)
+          .sort((a, b) => b.similarity - a.similarity)
+          .slice(0, maxExamples)
+          .map(s => s.example);
+      }
+    } catch (error) {
+      // Fall through to keyword matching if embeddings unavailable
+    }
+  }
+  // Fallback: Keyword-based similarity (Jaccard)
+  // For very long prompts, limit keyword extraction to avoid performance issues
+  const maxPromptLength = 10000; // Limit prompt processing to 10KB for performance
+  const processedPrompt = prompt.length > maxPromptLength
+    ? prompt.substring(0, maxPromptLength)
+    : prompt;
+  const promptKeywords = extractKeywords(processedPrompt.toLowerCase());
   // Score each example by keyword overlap
   const scored = examples.map(example => {
-    const exampleKeywords = extractKeywords(
-      (example.description || '') + ' ' + (example.evaluation || '')
-    );
+    const exampleText = (example.description || '') + ' ' + (example.evaluation || '');
+    const exampleKeywords = extractKeywords(exampleText.toLowerCase());
     // Jaccard similarity (intersection over union)
     const intersection = new Set(

package/src/errors.mjs CHANGED Viewed

@@ -2,7 +2,7 @@
  * Custom Error Classes for ai-visual-test
  *
  * Provides standardized error handling across the package.
- * Based on Playwright's error handling patterns and industry best practices.
+ * Based on Playwright's error handling patterns and industry practices.
  *
  * All errors extend AIBrowserTestError for consistent error handling and serialization.
  */
@@ -42,7 +42,11 @@ export class AIBrowserTestError extends Error {
       code: this.code,
       message: this.message,
       details: this.details,
-      stack: this.stack
+      // SECURITY: Stack traces may contain sensitive information
+      // Only include in development mode or when explicitly requested
+      ...(process.env.NODE_ENV === 'development' || process.env.INCLUDE_STACK_TRACES === 'true'
+        ? { stack: this.stack }
+        : {})
     };
   }
 }

package/src/experience-propagation.mjs CHANGED Viewed

@@ -110,10 +110,22 @@ let globalTracker = null;
 /**
  * Get or create global propagation tracker
+ *
+ * @param {Object} [options={}] - Options for tracker (only used on first call)
+ * @returns {ExperiencePropagationTracker} Global tracker instance
  */
 export function getPropagationTracker(options = {}) {
   if (!globalTracker) {
     globalTracker = new ExperiencePropagationTracker(options);
+  } else if (Object.keys(options).length > 0) {
+    // If tracker exists but options provided, update it
+    // This allows reconfiguration (though typically tracker is created once)
+    if (options.enabled !== undefined) {
+      globalTracker.enabled = options.enabled;
+    }
+    if (options.logLevel !== undefined) {
+      globalTracker.logLevel = options.logLevel;
+    }
   }
   return globalTracker;
 }

package/src/experience-tracer.mjs CHANGED Viewed

@@ -11,6 +11,7 @@
  */
 import { warn } from './logger.mjs';
+import { ValidationError } from './errors.mjs';
 /**
  * Experience Trace
@@ -133,7 +134,7 @@ export class ExperienceTrace {
    * @param {Record<string, unknown>} [options={}] - Aggregation options
    * @returns {import('./index.mjs').AggregatedTemporalNotes} Aggregated notes
    */
-  aggregateNotes(aggregateTemporalNotes, options = {}) {
+  async aggregateNotes(aggregateTemporalNotes, options = {}) {
     // Extract notes from events and validations
     const eventNotes = this.events
       .filter(e => e.type === 'interaction' || e.type === 'observation')
@@ -157,7 +158,7 @@ export class ExperienceTrace {
     const notes = [...eventNotes, ...validationNotes].sort((a, b) => a.timestamp - b.timestamp);
-    this.aggregatedNotes = aggregateTemporalNotes(notes, options);
+    this.aggregatedNotes = await aggregateTemporalNotes(notes, options);
     return this.aggregatedNotes;
   }
@@ -296,7 +297,15 @@ export class ExperienceTracerManager {
   async metaEvaluateTrace(sessionId, validateScreenshot) {
     const trace = this.getTrace(sessionId);
     if (!trace) {
-      throw new Error(`Trace not found: ${sessionId}`);
+      throw new ValidationError(
+        `Trace not found for session: ${sessionId}. ` +
+        `Use startTrace() to create a new trace, or listTraces() to see all available traces.`,
+        {
+          sessionId,
+          availableSessions: Object.keys(this.traces),
+          function: 'metaEvaluateTrace'
+        }
+      );
     }
     const evaluation = {