npm - @arclabs561/ai-visual-test - Versions diffs - 0.5.1 - Mend

@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/.secretsignore.example +20 -0
package/CHANGELOG.md +360 -0
package/CONTRIBUTING.md +63 -0
package/DEPLOYMENT.md +80 -0
package/LICENSE +22 -0
package/README.md +142 -0
package/SECURITY.md +108 -0
package/api/health.js +34 -0
package/api/validate.js +252 -0
package/index.d.ts +1221 -0
package/package.json +112 -0
package/public/index.html +149 -0
package/src/batch-optimizer.mjs +451 -0
package/src/bias-detector.mjs +370 -0
package/src/bias-mitigation.mjs +233 -0
package/src/cache.mjs +433 -0
package/src/config.mjs +268 -0
package/src/constants.mjs +80 -0
package/src/context-compressor.mjs +350 -0
package/src/convenience.mjs +617 -0
package/src/cost-tracker.mjs +257 -0
package/src/cross-modal-consistency.mjs +170 -0
package/src/data-extractor.mjs +232 -0
package/src/dynamic-few-shot.mjs +140 -0
package/src/dynamic-prompts.mjs +361 -0
package/src/ensemble/index.mjs +53 -0
package/src/ensemble-judge.mjs +366 -0
package/src/error-handler.mjs +67 -0
package/src/errors.mjs +167 -0
package/src/experience-propagation.mjs +128 -0
package/src/experience-tracer.mjs +487 -0
package/src/explanation-manager.mjs +299 -0
package/src/feedback-aggregator.mjs +248 -0
package/src/game-goal-prompts.mjs +478 -0
package/src/game-player.mjs +548 -0
package/src/hallucination-detector.mjs +155 -0
package/src/helpers/playwright.mjs +80 -0
package/src/human-validation-manager.mjs +516 -0
package/src/index.mjs +364 -0
package/src/judge.mjs +929 -0
package/src/latency-aware-batch-optimizer.mjs +192 -0
package/src/load-env.mjs +159 -0
package/src/logger.mjs +55 -0
package/src/metrics.mjs +187 -0
package/src/model-tier-selector.mjs +221 -0
package/src/multi-modal/index.mjs +36 -0
package/src/multi-modal-fusion.mjs +190 -0
package/src/multi-modal.mjs +524 -0
package/src/natural-language-specs.mjs +1071 -0
package/src/pair-comparison.mjs +277 -0
package/src/persona/index.mjs +42 -0
package/src/persona-enhanced.mjs +200 -0
package/src/persona-experience.mjs +572 -0
package/src/position-counterbalance.mjs +140 -0
package/src/prompt-composer.mjs +375 -0
package/src/render-change-detector.mjs +583 -0
package/src/research-enhanced-validation.mjs +436 -0
package/src/retry.mjs +152 -0
package/src/rubrics.mjs +231 -0
package/src/score-tracker.mjs +277 -0
package/src/smart-validator.mjs +447 -0
package/src/spec-config.mjs +106 -0
package/src/spec-templates.mjs +347 -0
package/src/specs/index.mjs +38 -0
package/src/temporal/index.mjs +102 -0
package/src/temporal-adaptive.mjs +163 -0
package/src/temporal-batch-optimizer.mjs +222 -0
package/src/temporal-constants.mjs +69 -0
package/src/temporal-context.mjs +49 -0
package/src/temporal-decision-manager.mjs +271 -0
package/src/temporal-decision.mjs +669 -0
package/src/temporal-errors.mjs +58 -0
package/src/temporal-note-pruner.mjs +173 -0
package/src/temporal-preprocessor.mjs +543 -0
package/src/temporal-prompt-formatter.mjs +219 -0
package/src/temporal-validation.mjs +159 -0
package/src/temporal.mjs +415 -0
package/src/type-guards.mjs +311 -0
package/src/uncertainty-reducer.mjs +470 -0
package/src/utils/index.mjs +175 -0
package/src/validation-framework.mjs +321 -0
package/src/validation-result-normalizer.mjs +64 -0
package/src/validation.mjs +243 -0
package/src/validators/accessibility-programmatic.mjs +345 -0
package/src/validators/accessibility-validator.mjs +223 -0
package/src/validators/batch-validator.mjs +143 -0
package/src/validators/hybrid-validator.mjs +268 -0
package/src/validators/index.mjs +34 -0
package/src/validators/prompt-builder.mjs +218 -0
package/src/validators/rubric.mjs +85 -0
package/src/validators/state-programmatic.mjs +260 -0
package/src/validators/state-validator.mjs +291 -0
package/vercel.json +27 -0

package/src/constants.mjs ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Shared Constants
+ *
+ * Centralized constants for magic numbers used throughout the codebase.
+ * All values are documented with their purpose and rationale.
+ */
+/**
+ * Cache Configuration
+ */
+export const CACHE_CONSTANTS = {
+  /** Maximum age of cache entries in milliseconds (7 days) */
+  MAX_CACHE_AGE_MS: 7 * 24 * 60 * 60 * 1000,
+  /** Maximum number of cache entries before LRU eviction */
+  MAX_CACHE_SIZE: 1000,
+  /** Maximum cache file size in bytes (100MB) */
+  MAX_CACHE_SIZE_BYTES: 100 * 1024 * 1024
+};
+/**
+ * Temporal Aggregation Configuration
+ */
+export const TEMPORAL_CONSTANTS = {
+  /** Default window size for temporal aggregation in milliseconds (10 seconds) */
+  DEFAULT_WINDOW_SIZE_MS: 10000,
+  /** Default exponential decay factor for older notes (0.9 = 10% decay per window) */
+  DEFAULT_DECAY_FACTOR: 0.9,
+  /** Default coherence threshold for temporal consistency checks (0.7 = 70% coherence required) */
+  DEFAULT_COHERENCE_THRESHOLD: 0.7
+};
+/**
+ * API Configuration
+ */
+export const API_CONSTANTS = {
+  /** Default timeout for API calls in milliseconds (30 seconds) */
+  DEFAULT_TIMEOUT_MS: 30000,
+  /** Default maximum concurrency for API calls */
+  DEFAULT_MAX_CONCURRENCY: 5
+};
+/**
+ * Batch Optimizer Configuration
+ */
+export const BATCH_OPTIMIZER_CONSTANTS = {
+  /** Maximum queue size before rejecting new requests (prevents memory leaks) */
+  MAX_QUEUE_SIZE: 1000,
+  /** Request timeout in milliseconds (30 seconds) */
+  REQUEST_TIMEOUT_MS: 30000
+};
+/**
+ * Uncertainty Reduction Configuration
+ */
+export const UNCERTAINTY_CONSTANTS = {
+  /** Low score threshold for edge case detection (bottom 30% of 0-10 scale) */
+  LOW_SCORE_THRESHOLD: 3,
+  /** High score threshold for edge case detection (top 10% of 0-10 scale) */
+  HIGH_SCORE_THRESHOLD: 9,
+  /** High uncertainty threshold for triggering self-consistency (0.3 = 30% uncertainty) */
+  HIGH_UNCERTAINTY_THRESHOLD: 0.3,
+  /** Issue count threshold for over-detection risk (5+ issues might indicate hallucination) */
+  OVER_DETECTION_ISSUE_COUNT: 5,
+  /** Self-consistency N for Tier 1 scenarios (expert, medical, blocking issues) */
+  TIER1_SELF_CONSISTENCY_N: 5,
+  /** Self-consistency N for edge cases (Tier 2) */
+  EDGE_CASE_SELF_CONSISTENCY_N: 3
+};

package/src/context-compressor.mjs ADDED Viewed

@@ -0,0 +1,350 @@
+/**
+ * Context Compressor
+ *
+ * Compresses historical context to reduce token usage while maintaining accuracy.
+ *
+ * General-purpose utility - no domain-specific logic.
+ */
+/**
+ * Compress context by aggregating notes and extracting key insights
+ *
+ * @param {import('./index.mjs').TemporalNote[]} notes - Array of temporal notes to compress
+ * @param {{
+ *   maxTokens?: number;
+ *   maxNotes?: number;
+ *   includeRecent?: boolean;
+ *   includeKeyEvents?: boolean;
+ *   aggregationStrategy?: 'temporal' | 'semantic' | 'importance';
+ * }} [options={}] - Compression options
+ * @returns {import('./index.mjs').TemporalNote[]} Compressed array of notes
+ */
+export function compressContext(notes, options = {}) {
+  const {
+    maxTokens = 500, // Target token count
+    maxNotes = 10, // Maximum notes to include
+    includeRecent = true, // Always include most recent notes
+    includeKeyEvents = true, // Always include key events (bugs, state changes)
+    aggregationStrategy = 'temporal' // 'temporal', 'semantic', 'importance'
+  } = options;
+  if (!notes || notes.length === 0) {
+    return {
+      compressed: [],
+      summary: 'No notes available',
+      tokenEstimate: 0,
+      compressionRatio: 1.0
+    };
+  }
+  // Sort notes by timestamp (most recent first)
+  const sortedNotes = [...notes].sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0));
+  // Extract key events (bugs, state changes, critical observations)
+  const keyEvents = sortedNotes.filter(note =>
+    note.step?.includes('bug') ||
+    note.step?.includes('error') ||
+    note.step?.includes('critical') ||
+    note.severity === 'CRITICAL' ||
+    note.reflection?.score !== undefined
+  );
+  // Select notes based on strategy
+  let selectedNotes = [];
+  if (aggregationStrategy === 'temporal') {
+    // Temporal: Include most recent + key events
+    const recentNotes = includeRecent ? sortedNotes.slice(0, Math.floor(maxNotes * 0.7)) : [];
+    const keyEventNotes = includeKeyEvents ? keyEvents.slice(0, Math.floor(maxNotes * 0.3)) : [];
+    // Combine and deduplicate
+    const combined = [...recentNotes, ...keyEventNotes];
+    const seen = new Set();
+    selectedNotes = combined.filter(note => {
+      const id = note.step + (note.timestamp || 0);
+      if (seen.has(id)) return false;
+      seen.add(id);
+      return true;
+    }).slice(0, maxNotes);
+  } else if (aggregationStrategy === 'semantic') {
+    // Semantic: Group by similarity and select representatives
+    selectedNotes = selectSemanticRepresentatives(sortedNotes, maxNotes, keyEvents);
+  } else if (aggregationStrategy === 'importance') {
+    // Importance: Score notes by importance and select top
+    selectedNotes = selectByImportance(sortedNotes, maxNotes, keyEvents);
+  }
+  // Generate summary from selected notes
+  const summary = generateSummary(selectedNotes, sortedNotes);
+  // Estimate token count
+  const tokenEstimate = estimateTokens(selectedNotes, summary);
+  const originalTokenEstimate = estimateTokens(sortedNotes);
+  const compressionRatio = originalTokenEstimate > 0 ? tokenEstimate / originalTokenEstimate : 1.0;
+  return {
+    compressed: selectedNotes,
+    summary,
+    tokenEstimate,
+    compressionRatio,
+    originalCount: notes.length,
+    compressedCount: selectedNotes.length
+  };
+}
+/**
+ * Select semantic representatives (group similar notes, pick one from each group)
+ */
+function selectSemanticRepresentatives(notes, maxNotes, keyEvents) {
+  // Simple semantic grouping by step type
+  const groups = new Map();
+  notes.forEach(note => {
+    const groupKey = note.step?.split('_')[0] || 'other';
+    if (!groups.has(groupKey)) {
+      groups.set(groupKey, []);
+    }
+    groups.get(groupKey).push(note);
+  });
+  // Select most recent from each group
+  const representatives = [];
+  for (const [groupKey, groupNotes] of groups.entries()) {
+    const sorted = groupNotes.sort((a, b) => (b.timestamp || 0) - (a.timestamp || 0));
+    representatives.push(sorted[0]);
+  }
+  // Always include key events
+  const combined = [...representatives, ...keyEvents];
+  const seen = new Set();
+  return combined.filter(note => {
+    const id = note.step + (note.timestamp || 0);
+    if (seen.has(id)) return false;
+    seen.add(id);
+    return true;
+  }).slice(0, maxNotes);
+}
+/**
+ * Select notes by importance score
+ */
+function selectByImportance(notes, maxNotes, keyEvents) {
+  // Score notes by importance
+  const scored = notes.map(note => {
+    let score = 0;
+    // Key events get high score
+    if (keyEvents.includes(note)) score += 10;
+    // Recent notes get higher score
+    const age = Date.now() - (note.timestamp || 0);
+    const ageScore = Math.max(0, 10 - (age / 1000)); // Decay over 10 seconds
+    score += ageScore;
+    // Critical severity gets high score
+    if (note.severity === 'CRITICAL') score += 5;
+    if (note.severity === 'HIGH') score += 3;
+    // Reflections get higher score
+    if (note.reflection) score += 2;
+    // State changes get higher score
+    if (note.gameState || note.state) score += 1;
+    return { note, score };
+  });
+  // Sort by score and select top
+  const topScored = scored.sort((a, b) => b.score - a.score).slice(0, maxNotes);
+  return topScored.map(item => item.note);
+}
+/**
+ * Generate summary from selected notes
+ */
+function generateSummary(selectedNotes, allNotes) {
+  if (selectedNotes.length === 0) {
+    return 'No notes available';
+  }
+  const parts = [];
+  // Count by type
+  const typeCounts = {};
+  selectedNotes.forEach(note => {
+    const type = note.step?.split('_')[0] || 'other';
+    typeCounts[type] = (typeCounts[type] || 0) + 1;
+  });
+  parts.push(`Summary: ${selectedNotes.length} key observations from ${allNotes.length} total notes.`);
+  // Key statistics
+  const bugs = selectedNotes.filter(n => n.step?.includes('bug')).length;
+  const reflections = selectedNotes.filter(n => n.reflection).length;
+  const critical = selectedNotes.filter(n => n.severity === 'CRITICAL').length;
+  if (bugs > 0) parts.push(`${bugs} bug detection(s)`);
+  if (reflections > 0) parts.push(`${reflections} reflection(s)`);
+  if (critical > 0) parts.push(`${critical} critical issue(s)`);
+  // Time span
+  if (selectedNotes.length > 1) {
+    const first = selectedNotes[selectedNotes.length - 1].timestamp || 0;
+    const last = selectedNotes[0].timestamp || 0;
+    const span = Math.round((last - first) / 1000);
+    if (span > 0) parts.push(`Time span: ${span}s`);
+  }
+  return parts.join(', ');
+}
+/**
+ * Estimate token count for notes
+ */
+function estimateTokens(notes, summary = '') {
+  // Rough estimate: 1 token ≈ 4 characters
+  const noteText = notes.map(n =>
+    `${n.step || ''} ${n.observation || ''} ${JSON.stringify(n.gameState || n.state || {})}`
+  ).join(' ');
+  const totalText = noteText + ' ' + summary;
+  return Math.ceil(totalText.length / 4);
+}
+/**
+ * Compress state history by keeping important transitions
+ *
+ * @param {Array<Record<string, unknown>>} stateHistory - Array of state objects
+ * @param {{
+ *   maxLength?: number;
+ *   preserveImportant?: boolean;
+ * }} [options={}] - Compression options
+ * @returns {Array<Record<string, unknown>>} Compressed state history
+ */
+export function compressStateHistory(stateHistory, options = {}) {
+  const {
+    maxStates = 3, // Maximum states to include
+    includeFirst = true, // Always include first state
+    includeLast = true, // Always include last state
+    includeKeyTransitions = true // Include states with significant changes
+  } = options;
+  if (!stateHistory || stateHistory.length === 0) {
+    return {
+      compressed: [],
+      summary: 'No state history',
+      tokenEstimate: 0
+    };
+  }
+  const states = Array.isArray(stateHistory) ? stateHistory : [stateHistory];
+  // Select key states
+  let selectedStates = [];
+  if (includeFirst && states.length > 0) {
+    selectedStates.push(states[0]);
+  }
+  if (includeLast && states.length > 1 && states[states.length - 1] !== states[0]) {
+    selectedStates.push(states[states.length - 1]);
+  }
+  // Find key transitions (significant changes)
+  if (includeKeyTransitions && states.length > 2) {
+    const transitions = findKeyTransitions(states);
+    selectedStates.push(...transitions);
+  }
+  // Deduplicate and limit
+  const seen = new Set();
+  const unique = selectedStates.filter(state => {
+    const id = JSON.stringify(state);
+    if (seen.has(id)) return false;
+    seen.add(id);
+    return true;
+  }).slice(0, maxStates);
+  // Generate summary
+  const summary = generateStateSummary(unique, states);
+  // Estimate tokens
+  const tokenEstimate = estimateStateTokens(unique, summary);
+  const originalTokenEstimate = estimateStateTokens(states);
+  const compressionRatio = originalTokenEstimate > 0 ? tokenEstimate / originalTokenEstimate : 1.0;
+  return {
+    compressed: unique,
+    summary,
+    tokenEstimate,
+    compressionRatio,
+    originalCount: states.length,
+    compressedCount: unique.length,
+    originalTokenEstimate
+  };
+}
+/**
+ * Find key transitions (states with significant changes)
+ */
+function findKeyTransitions(states) {
+  const transitions = [];
+  for (let i = 1; i < states.length; i++) {
+    const prev = states[i - 1];
+    const curr = states[i];
+    // Check for significant changes (general-purpose, not game-specific)
+    const hasSignificantChange = Object.keys(curr).some(key => {
+      const prevVal = prev[key];
+      const currVal = curr[key];
+      // Numeric changes
+      if (typeof prevVal === 'number' && typeof currVal === 'number') {
+        return Math.abs(currVal - prevVal) > 10; // Threshold for significant change
+      }
+      // String/boolean changes
+      return prevVal !== currVal;
+    });
+    if (hasSignificantChange) {
+      transitions.push(curr);
+    }
+  }
+  return transitions;
+}
+/**
+ * Generate summary for state history
+ */
+function generateStateSummary(selectedStates, allStates) {
+  if (selectedStates.length === 0) {
+    return 'No state history';
+  }
+  const parts = [];
+  parts.push(`${selectedStates.length} key states from ${allStates.length} total`);
+  if (selectedStates.length > 1) {
+    const first = selectedStates[0];
+    const last = selectedStates[selectedStates.length - 1];
+    // Check for any changes (general-purpose)
+    const hasChanges = Object.keys(last).some(key => first[key] !== last[key]);
+    if (hasChanges) parts.push('state changes detected');
+  }
+  return parts.join(', ');
+}
+/**
+ * Estimate tokens for state history
+ */
+function estimateStateTokens(states, summary = '') {
+  const stateText = states.map(s => JSON.stringify(s)).join(' ');
+  const totalText = stateText + ' ' + summary;
+  return Math.ceil(totalText.length / 4);
+}