npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 → 0.7.3 - Mend

@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/CHANGELOG.md +102 -11
package/DEPLOYMENT.md +225 -9
package/README.md +71 -80
package/index.d.ts +862 -3
package/package.json +10 -51
package/src/batch-optimizer.mjs +39 -0
package/src/cache.mjs +241 -16
package/src/config.mjs +33 -91
package/src/constants.mjs +54 -0
package/src/convenience.mjs +113 -10
package/src/cost-optimization.mjs +1 -0
package/src/cost-tracker.mjs +134 -2
package/src/data-extractor.mjs +36 -7
package/src/dynamic-few-shot.mjs +69 -11
package/src/errors.mjs +6 -2
package/src/experience-propagation.mjs +12 -0
package/src/experience-tracer.mjs +12 -3
package/src/game-player.mjs +222 -43
package/src/graceful-shutdown.mjs +126 -0
package/src/helpers/playwright.mjs +22 -8
package/src/human-validation-manager.mjs +99 -2
package/src/index.mjs +48 -3
package/src/integrations/playwright.mjs +140 -0
package/src/judge.mjs +697 -24
package/src/load-env.mjs +2 -1
package/src/logger.mjs +31 -3
package/src/model-tier-selector.mjs +1 -221
package/src/natural-language-specs.mjs +31 -3
package/src/persona-enhanced.mjs +4 -2
package/src/persona-experience.mjs +1 -1
package/src/pricing.mjs +28 -0
package/src/prompt-composer.mjs +162 -5
package/src/provider-data.mjs +115 -0
package/src/render-change-detector.mjs +5 -0
package/src/research-enhanced-validation.mjs +7 -5
package/src/retry.mjs +21 -7
package/src/rubrics.mjs +4 -0
package/src/safe-logger.mjs +71 -0
package/src/session-cost-tracker.mjs +320 -0
package/src/smart-validator.mjs +8 -8
package/src/spec-templates.mjs +52 -6
package/src/startup-validation.mjs +127 -0
package/src/temporal-adaptive.mjs +2 -2
package/src/temporal-decision-manager.mjs +1 -271
package/src/temporal-logic.mjs +104 -0
package/src/temporal-note-pruner.mjs +119 -0
package/src/temporal-preprocessor.mjs +1 -543
package/src/temporal.mjs +681 -79
package/src/utils/action-hallucination-detector.mjs +301 -0
package/src/utils/baseline-validator.mjs +82 -0
package/src/utils/cache-stats.mjs +104 -0
package/src/utils/cached-llm.mjs +164 -0
package/src/utils/capability-stratifier.mjs +108 -0
package/src/utils/counterfactual-tester.mjs +83 -0
package/src/utils/error-recovery.mjs +117 -0
package/src/utils/explainability-scorer.mjs +119 -0
package/src/utils/exploratory-automation.mjs +131 -0
package/src/utils/index.mjs +10 -0
package/src/utils/intent-recognizer.mjs +201 -0
package/src/utils/log-sanitizer.mjs +165 -0
package/src/utils/path-validator.mjs +88 -0
package/src/utils/performance-logger.mjs +316 -0
package/src/utils/performance-measurement.mjs +280 -0
package/src/utils/prompt-sanitizer.mjs +213 -0
package/src/utils/rate-limiter.mjs +144 -0
package/src/validation-framework.mjs +24 -20
package/src/validation-result-normalizer.mjs +27 -1
package/src/validation.mjs +75 -25
package/src/validators/accessibility-validator.mjs +144 -0
package/src/validators/hybrid-validator.mjs +48 -4
package/api/health.js +0 -34
package/api/validate.js +0 -252
package/public/index.html +0 -149
package/vercel.json +0 -27

package/src/utils/performance-logger.mjs ADDED Viewed

@@ -0,0 +1,316 @@
+/**
+ * Performance Logger
+ *
+ * Provides structured logging for critical performance metrics:
+ * - API call performance (latency, retries, errors, costs)
+ * - Cache effectiveness (hit rates, eviction patterns)
+ * - Temporal decision reasoning (why prompts triggered/skipped)
+ * - Batch optimizer metrics (queue depth, timeouts, rejections)
+ * - Error patterns (frequency, types, recovery)
+ *
+ * Weighted logging: More detail for critical paths (API calls, cache misses, errors)
+ */
+import { log, warn, error, isDebugEnabled } from '../logger.mjs';
+/**
+ * Log API call performance
+ *
+ * @param {Object} params - Performance data
+ * @param {string} params.provider - Provider name (gemini, openai, claude, groq)
+ * @param {number} params.latency - Response time in ms
+ * @param {number} params.retries - Number of retries
+ * @param {number} params.cost - Estimated cost
+ * @param {number} params.inputTokens - Input tokens
+ * @param {number} params.outputTokens - Output tokens
+ * @param {boolean} params.success - Whether call succeeded
+ * @param {Error} [params.error] - Error if failed
+ * @param {string} [params.testName] - Test name for context
+ */
+export function logAPICallPerformance(params) {
+  const {
+    provider,
+    latency,
+    retries = 0,
+    cost = null,
+    inputTokens = 0,
+    outputTokens = 0,
+    success = true,
+    error: err = null,
+    testName = 'unknown'
+  } = params;
+  // Always log errors (critical visibility)
+  if (!success && err) {
+    error(`[API] ${provider} call failed`, {
+      provider,
+      latency,
+      retries,
+      error: err.message,
+      testName,
+      stack: err.stack
+    });
+    return;
+  }
+  // Log retries (important for debugging)
+  if (retries > 0) {
+    warn(`[API] ${provider} call succeeded after ${retries} retries`, {
+      provider,
+      latency,
+      retries,
+      cost,
+      testName
+    });
+  }
+  // Detailed logging in debug mode (weighted: always log for critical paths)
+  if (isDebugEnabled() || latency > 5000 || retries > 0) {
+    log(`[API] ${provider} call`, {
+      provider,
+      latency: `${latency}ms`,
+      retries,
+      cost: cost ? `$${cost.toFixed(6)}` : null,
+      tokens: `${inputTokens} in, ${outputTokens} out`,
+      testName,
+      performance: latency < 1000 ? 'fast' : latency < 3000 ? 'normal' : 'slow'
+    });
+  }
+}
+/**
+ * Log cache operation
+ *
+ * @param {Object} params - Cache operation data
+ * @param {string} params.operation - Operation type (hit, miss, set, evict, expire)
+ * @param {boolean} params.hit - Whether it was a hit
+ * @param {number} [params.latency] - Lookup latency in ms
+ * @param {number} [params.cacheSize] - Current cache size
+ * @param {number} [params.maxSize] - Max cache size
+ * @param {string} [params.reason] - Reason for eviction/expiration
+ */
+export function logCacheOperation(params) {
+  const {
+    operation,
+    hit = false,
+    latency = null,
+    cacheSize = null,
+    maxSize = null,
+    reason = null
+  } = params;
+  // Always log evictions and expirations (important for cache health)
+  if (operation === 'evict' || operation === 'expire') {
+    warn(`[Cache] ${operation}`, {
+      operation,
+      cacheSize,
+      maxSize,
+      reason,
+      utilization: maxSize ? `${((cacheSize / maxSize) * 100).toFixed(1)}%` : null
+    });
+    return;
+  }
+  // Log misses in debug mode (weighted: cache misses are important)
+  if (operation === 'miss' && isDebugEnabled()) {
+    log(`[Cache] miss`, {
+      operation,
+      latency: latency ? `${latency}ms` : null,
+      cacheSize,
+      maxSize
+    });
+  }
+  // Log hits only in verbose debug mode (less critical)
+  if (operation === 'hit' && isDebugEnabled()) {
+    log(`[Cache] hit`, {
+      operation,
+      latency: latency ? `${latency}ms` : null,
+      cacheSize
+    });
+  }
+}
+/**
+ * Log temporal decision reasoning
+ *
+ * @param {Object} params - Decision data
+ * @param {boolean} params.shouldPrompt - Whether to prompt
+ * @param {string} params.reason - Reason for decision
+ * @param {string} params.urgency - Urgency level (low, medium, high)
+ * @param {number} [params.coherence] - Temporal coherence score
+ * @param {number} [params.stateChange] - State change magnitude
+ * @param {number} [params.noteCount] - Number of temporal notes
+ * @param {boolean} [params.isDecisionPoint] - Whether this is a decision point
+ * @param {boolean} [params.hasUserAction] - Whether user action occurred
+ */
+export function logTemporalDecision(params) {
+  const {
+    shouldPrompt,
+    reason,
+    urgency,
+    coherence = null,
+    stateChange = null,
+    noteCount = null,
+    isDecisionPoint = false,
+    hasUserAction = false
+  } = params;
+  // Always log high-urgency decisions (critical visibility)
+  if (urgency === 'high') {
+    log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
+      shouldPrompt,
+      reason,
+      urgency,
+      coherence,
+      stateChange,
+      noteCount,
+      isDecisionPoint,
+      hasUserAction
+    });
+    return;
+  }
+  // Log medium-urgency in debug mode
+  if (urgency === 'medium' && isDebugEnabled()) {
+    log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
+      shouldPrompt,
+      reason,
+      urgency,
+      coherence,
+      stateChange,
+      noteCount
+    });
+  }
+  // Log low-urgency only in verbose debug mode
+  if (urgency === 'low' && isDebugEnabled()) {
+    log(`[Temporal] Decision: ${shouldPrompt ? 'PROMPT' : 'WAIT'} (${urgency})`, {
+      shouldPrompt,
+      reason,
+      urgency
+    });
+  }
+}
+/**
+ * Log batch optimizer metrics
+ *
+ * @param {Object} params - Batch optimizer data
+ * @param {string} params.event - Event type (queue, process, timeout, reject)
+ * @param {number} [params.queueDepth] - Current queue depth
+ * @param {number} [params.maxQueueSize] - Max queue size
+ * @param {number} [params.activeRequests] - Active concurrent requests
+ * @param {number} [params.maxConcurrency] - Max concurrency
+ * @param {number} [params.waitTime] - Wait time in ms
+ * @param {string} [params.reason] - Reason for timeout/rejection
+ */
+export function logBatchOptimizer(params) {
+  const {
+    event,
+    queueDepth = null,
+    maxQueueSize = null,
+    activeRequests = null,
+    maxConcurrency = null,
+    waitTime = null,
+    reason = null
+  } = params;
+  // Always log rejections and timeouts (critical visibility)
+  if (event === 'reject' || event === 'timeout') {
+    warn(`[BatchOptimizer] ${event}`, {
+      event,
+      queueDepth,
+      maxQueueSize,
+      activeRequests,
+      maxConcurrency,
+      waitTime: waitTime ? `${waitTime}ms` : null,
+      reason,
+      utilization: maxQueueSize ? `${((queueDepth / maxQueueSize) * 100).toFixed(1)}%` : null
+    });
+    return;
+  }
+  // Log queue depth when high (important for monitoring)
+  if (event === 'queue' && queueDepth && maxQueueSize && queueDepth > maxQueueSize * 0.8) {
+    warn(`[BatchOptimizer] High queue depth`, {
+      event,
+      queueDepth,
+      maxQueueSize,
+      utilization: `${((queueDepth / maxQueueSize) * 100).toFixed(1)}%`
+    });
+  }
+  // Log processing in debug mode
+  if (event === 'process' && isDebugEnabled()) {
+    log(`[BatchOptimizer] ${event}`, {
+      event,
+      queueDepth,
+      activeRequests,
+      maxConcurrency,
+      waitTime: waitTime ? `${waitTime}ms` : null
+    });
+  }
+}
+/**
+ * Log error pattern
+ *
+ * @param {Object} params - Error data
+ * @param {Error} params.error - Error object
+ * @param {string} params.context - Context where error occurred
+ * @param {string} [params.recovery] - Recovery strategy attempted
+ * @param {boolean} [params.recovered] - Whether recovery succeeded
+ * @param {number} [params.retryCount] - Number of retries
+ */
+export function logErrorPattern(params) {
+  const {
+    error: err,
+    context,
+    recovery = null,
+    recovered = false,
+    retryCount = 0
+  } = params;
+  // Always log errors (critical visibility)
+  error(`[Error] ${context}`, {
+    context,
+    error: err.message,
+    errorType: err.constructor.name,
+    recovery,
+    recovered,
+    retryCount,
+    stack: err.stack
+  });
+}
+/**
+ * Log cache statistics summary
+ *
+ * @param {Object} params - Cache statistics
+ * @param {number} params.hits - Number of cache hits
+ * @param {number} params.misses - Number of cache misses
+ * @param {number} params.hitRate - Hit rate percentage
+ * @param {number} params.avgLatency - Average lookup latency in ms
+ * @param {number} params.savings - Estimated time/cost savings
+ */
+export function logCacheStats(params) {
+  const {
+    hits,
+    misses,
+    hitRate,
+    avgLatency,
+    savings = null
+  } = params;
+  // Always log cache statistics (important for optimization)
+  log(`[Cache] Statistics`, {
+    hits,
+    misses,
+    hitRate: `${hitRate.toFixed(1)}%`,
+    avgLatency: `${avgLatency.toFixed(2)}ms`,
+    savings: savings ? `${savings}ms saved` : null,
+    effectiveness: hitRate > 50 ? 'good' : hitRate > 30 ? 'moderate' : 'low'
+  });
+}

package/src/utils/performance-measurement.mjs ADDED Viewed

@@ -0,0 +1,280 @@
+/**
+ * Performance Measurement Utilities
+ *
+ * Provides utilities for measuring and tracking performance metrics
+ * across the system. Useful for identifying bottlenecks and optimizing.
+ */
+import { log, warn } from '../logger.mjs';
+// Use performance.now() if available (Node.js 16.5+), otherwise use Date.now()
+const getHighResTime = typeof performance !== 'undefined' && performance.now
+  ? () => performance.now()
+  : () => Date.now();
+/**
+ * Performance measurement class
+ */
+export class PerformanceMeasurement {
+  constructor(name, options = {}) {
+    this.name = name;
+    this.startTime = null;
+    this.endTime = null;
+    this.marks = [];
+    this.metadata = options.metadata || {};
+    this.autoLog = options.autoLog !== false; // Default true
+  }
+  /**
+   * Start measurement
+   */
+  start() {
+    this.startTime = getHighResTime();
+    this.marks = [];
+    return this;
+  }
+  /**
+   * Mark a checkpoint
+   *
+   * @param {string} label - Checkpoint label
+   * @param {Object} [metadata={}] - Additional metadata
+   */
+  mark(label, metadata = {}) {
+    const now = getHighResTime();
+    const elapsed = this.startTime ? now - this.startTime : 0;
+    this.marks.push({
+      label,
+      timestamp: now,
+      elapsed,
+      metadata
+    });
+    return this;
+  }
+  /**
+   * End measurement
+   *
+   * @param {Object} [metadata={}] - Final metadata
+   * @returns {Object} Measurement result
+   */
+  end(metadata = {}) {
+    this.endTime = getHighResTime();
+    const duration = this.startTime ? this.endTime - this.startTime : 0;
+    const result = {
+      name: this.name,
+      duration,
+      durationMs: duration.toFixed(2),
+      marks: this.marks,
+      metadata: { ...this.metadata, ...metadata }
+    };
+    if (this.autoLog) {
+      log(`[Performance] ${this.name}: ${duration.toFixed(2)}ms`);
+      if (this.marks.length > 0) {
+        this.marks.forEach(mark => {
+          log(`  - ${mark.label}: ${mark.elapsed.toFixed(2)}ms`);
+        });
+      }
+    }
+    return result;
+  }
+  /**
+   * Get current elapsed time without ending
+   */
+  getElapsed() {
+    if (!this.startTime) return 0;
+    return getHighResTime() - this.startTime;
+  }
+}
+/**
+ * Measure async function execution
+ *
+ * @param {string} name - Measurement name
+ * @param {Function} fn - Async function to measure
+ * @param {Object} [options={}] - Measurement options
+ * @returns {Promise<*>} Function result
+ */
+export async function measureAsync(name, fn, options = {}) {
+  const measurement = new PerformanceMeasurement(name, options);
+  measurement.start();
+  try {
+    const result = await fn();
+    const measurementResult = measurement.end({ success: true });
+    return { result, measurement: measurementResult };
+  } catch (error) {
+    measurement.end({ success: false, error: error.message });
+    throw error;
+  }
+}
+/**
+ * Measure sync function execution
+ *
+ * @param {string} name - Measurement name
+ * @param {Function} fn - Sync function to measure
+ * @param {Object} [options={}] - Measurement options
+ * @returns {*} Function result
+ */
+export function measureSync(name, fn, options = {}) {
+  const measurement = new PerformanceMeasurement(name, options);
+  measurement.start();
+  try {
+    const result = fn();
+    const measurementResult = measurement.end({ success: true });
+    return { result, measurement: measurementResult };
+  } catch (error) {
+    measurement.end({ success: false, error: error.message });
+    throw error;
+  }
+}
+/**
+ * Performance profiler for tracking multiple operations
+ */
+export class PerformanceProfiler {
+  constructor() {
+    this.measurements = [];
+    this.active = new Map();
+  }
+  /**
+   * Start profiling an operation
+   *
+   * @param {string} name - Operation name
+   * @param {Object} [metadata={}] - Metadata
+   * @returns {string} Profile ID
+   */
+  start(name, metadata = {}) {
+    const id = `${name}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+    const measurement = new PerformanceMeasurement(name, { metadata, autoLog: false });
+    measurement.start();
+    this.active.set(id, measurement);
+    return id;
+  }
+  /**
+   * End profiling an operation
+   *
+   * @param {string} id - Profile ID
+   * @param {Object} [metadata={}] - Final metadata
+   * @returns {Object} Measurement result
+   */
+  end(id, metadata = {}) {
+    const measurement = this.active.get(id);
+    if (!measurement) {
+      warn(`[PerformanceProfiler] No active measurement found for ID: ${id}`);
+      return null;
+    }
+    this.active.delete(id);
+    const result = measurement.end(metadata);
+    this.measurements.push(result);
+    return result;
+  }
+  /**
+   * Get summary statistics
+   *
+   * @returns {Object} Summary statistics
+   */
+  getSummary() {
+    if (this.measurements.length === 0) {
+      return { count: 0 };
+    }
+    const durations = this.measurements.map(m => m.duration);
+    const total = durations.reduce((sum, d) => sum + d, 0);
+    const average = total / durations.length;
+    const min = Math.min(...durations);
+    const max = Math.max(...durations);
+    // Calculate percentiles
+    const sorted = [...durations].sort((a, b) => a - b);
+    const p50 = sorted[Math.floor(sorted.length * 0.5)];
+    const p95 = sorted[Math.floor(sorted.length * 0.95)];
+    const p99 = sorted[Math.floor(sorted.length * 0.99)];
+    // Group by name
+    const byName = {};
+    this.measurements.forEach(m => {
+      if (!byName[m.name]) {
+        byName[m.name] = { count: 0, total: 0, durations: [] };
+      }
+      byName[m.name].count += 1;
+      byName[m.name].total += m.duration;
+      byName[m.name].durations.push(m.duration);
+    });
+    // Calculate averages by name
+    Object.keys(byName).forEach(name => {
+      const data = byName[name];
+      data.average = data.total / data.count;
+      data.min = Math.min(...data.durations);
+      data.max = Math.max(...data.durations);
+      delete data.durations; // Remove raw durations
+    });
+    return {
+      count: this.measurements.length,
+      total: total.toFixed(2),
+      average: average.toFixed(2),
+      min: min.toFixed(2),
+      max: max.toFixed(2),
+      p50: p50.toFixed(2),
+      p95: p95.toFixed(2),
+      p99: p99.toFixed(2),
+      byName
+    };
+  }
+  /**
+   * Reset profiler
+   */
+  reset() {
+    this.measurements = [];
+    this.active.clear();
+  }
+  /**
+   * Clear measurements (alias for reset)
+   */
+  clear() {
+    this.reset();
+  }
+  /**
+   * Export measurements
+   *
+   * @returns {Array} All measurements
+   */
+  export() {
+    return [...this.measurements];
+  }
+}
+/**
+ * Global profiler instance
+ */
+let globalProfiler = null;
+/**
+ * Get global profiler instance
+ *
+ * @returns {PerformanceProfiler} Profiler instance
+ */
+export function getProfiler() {
+  if (!globalProfiler) {
+    globalProfiler = new PerformanceProfiler();
+  }
+  return globalProfiler;
+}