@arclabs561/ai-visual-test 0.5.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +102 -11
  2. package/DEPLOYMENT.md +225 -9
  3. package/README.md +71 -80
  4. package/index.d.ts +862 -3
  5. package/package.json +10 -51
  6. package/src/batch-optimizer.mjs +39 -0
  7. package/src/cache.mjs +241 -16
  8. package/src/config.mjs +33 -91
  9. package/src/constants.mjs +54 -0
  10. package/src/convenience.mjs +113 -10
  11. package/src/cost-optimization.mjs +1 -0
  12. package/src/cost-tracker.mjs +134 -2
  13. package/src/data-extractor.mjs +36 -7
  14. package/src/dynamic-few-shot.mjs +69 -11
  15. package/src/errors.mjs +6 -2
  16. package/src/experience-propagation.mjs +12 -0
  17. package/src/experience-tracer.mjs +12 -3
  18. package/src/game-player.mjs +222 -43
  19. package/src/graceful-shutdown.mjs +126 -0
  20. package/src/helpers/playwright.mjs +22 -8
  21. package/src/human-validation-manager.mjs +99 -2
  22. package/src/index.mjs +48 -3
  23. package/src/integrations/playwright.mjs +140 -0
  24. package/src/judge.mjs +697 -24
  25. package/src/load-env.mjs +2 -1
  26. package/src/logger.mjs +31 -3
  27. package/src/model-tier-selector.mjs +1 -221
  28. package/src/natural-language-specs.mjs +31 -3
  29. package/src/persona-enhanced.mjs +4 -2
  30. package/src/persona-experience.mjs +1 -1
  31. package/src/pricing.mjs +28 -0
  32. package/src/prompt-composer.mjs +162 -5
  33. package/src/provider-data.mjs +115 -0
  34. package/src/render-change-detector.mjs +5 -0
  35. package/src/research-enhanced-validation.mjs +7 -5
  36. package/src/retry.mjs +21 -7
  37. package/src/rubrics.mjs +4 -0
  38. package/src/safe-logger.mjs +71 -0
  39. package/src/session-cost-tracker.mjs +320 -0
  40. package/src/smart-validator.mjs +8 -8
  41. package/src/spec-templates.mjs +52 -6
  42. package/src/startup-validation.mjs +127 -0
  43. package/src/temporal-adaptive.mjs +2 -2
  44. package/src/temporal-decision-manager.mjs +1 -271
  45. package/src/temporal-logic.mjs +104 -0
  46. package/src/temporal-note-pruner.mjs +119 -0
  47. package/src/temporal-preprocessor.mjs +1 -543
  48. package/src/temporal.mjs +681 -79
  49. package/src/utils/action-hallucination-detector.mjs +301 -0
  50. package/src/utils/baseline-validator.mjs +82 -0
  51. package/src/utils/cache-stats.mjs +104 -0
  52. package/src/utils/cached-llm.mjs +164 -0
  53. package/src/utils/capability-stratifier.mjs +108 -0
  54. package/src/utils/counterfactual-tester.mjs +83 -0
  55. package/src/utils/error-recovery.mjs +117 -0
  56. package/src/utils/explainability-scorer.mjs +119 -0
  57. package/src/utils/exploratory-automation.mjs +131 -0
  58. package/src/utils/index.mjs +10 -0
  59. package/src/utils/intent-recognizer.mjs +201 -0
  60. package/src/utils/log-sanitizer.mjs +165 -0
  61. package/src/utils/path-validator.mjs +88 -0
  62. package/src/utils/performance-logger.mjs +316 -0
  63. package/src/utils/performance-measurement.mjs +280 -0
  64. package/src/utils/prompt-sanitizer.mjs +213 -0
  65. package/src/utils/rate-limiter.mjs +144 -0
  66. package/src/validation-framework.mjs +24 -20
  67. package/src/validation-result-normalizer.mjs +27 -1
  68. package/src/validation.mjs +75 -25
  69. package/src/validators/accessibility-validator.mjs +144 -0
  70. package/src/validators/hybrid-validator.mjs +48 -4
  71. package/api/health.js +0 -34
  72. package/api/validate.js +0 -252
  73. package/public/index.html +0 -149
  74. package/vercel.json +0 -27
package/index.d.ts CHANGED
@@ -210,26 +210,126 @@ export interface EnsembleResult {
210
210
  votingMethod: string;
211
211
  }
212
212
 
213
+ /**
214
+ * Ensemble Judge
215
+ *
216
+ * Uses multiple LLM providers to evaluate screenshots and aggregates results
217
+ * for improved accuracy (10-20% improvement with 3+ models).
218
+ *
219
+ * **Research:** Based on arXiv:2510.01499 - "Optimal LLM Aggregation"
220
+ *
221
+ * **Use when:** You need maximum reliability for critical evaluations
222
+ * (accessibility, quality checks, design validation).
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * const judge = new EnsembleJudge({
227
+ * judges: [
228
+ * new VLLMJudge({ provider: 'gemini' }),
229
+ * new VLLMJudge({ provider: 'openai' }),
230
+ * new VLLMJudge({ provider: 'claude' })
231
+ * ],
232
+ * votingMethod: 'weighted_average'
233
+ * });
234
+ *
235
+ * const result = await judge.evaluate(
236
+ * 'screenshot.png',
237
+ * 'Evaluate accessibility'
238
+ * );
239
+ *
240
+ * console.log(result.score); // Aggregated score
241
+ * console.log(result.agreement.score); // How much models agree
242
+ * ```
243
+ */
213
244
  export class EnsembleJudge {
245
+ /**
246
+ * Create a new Ensemble Judge instance.
247
+ *
248
+ * @param options - Ensemble options (judges, voting method, weights, etc.)
249
+ */
214
250
  constructor(options?: EnsembleJudgeOptions);
251
+
252
+ /**
253
+ * Evaluate screenshot using multiple judges and aggregate results.
254
+ *
255
+ * @param imagePath - Path to screenshot
256
+ * @param prompt - Evaluation prompt
257
+ * @param context - Optional validation context
258
+ * @returns Promise resolving to EnsembleResult with aggregated score and agreement metrics
259
+ */
215
260
  evaluate(imagePath: string, prompt: string, context?: Record<string, unknown>): Promise<EnsembleResult>;
216
261
  }
217
262
 
218
263
  export function createEnsembleJudge(providers?: string[], options?: EnsembleJudgeOptions): EnsembleJudge;
219
264
 
220
265
  // Core Types
266
+ /**
267
+ * Validation context for screenshot validation.
268
+ *
269
+ * Provides additional context to guide the AI evaluation, including test type,
270
+ * viewport information, game state, and optimization options.
271
+ *
272
+ * @example
273
+ * ```typescript
274
+ * const context: ValidationContext = {
275
+ * testType: 'accessibility',
276
+ * viewport: { width: 1920, height: 1080 },
277
+ * autoSelectTier: true,
278
+ * autoSelectProvider: true
279
+ * };
280
+ * ```
281
+ */
221
282
  export interface ValidationContext {
283
+ /** Test type identifier (e.g., 'accessibility', 'payment-screen', 'gameplay') */
222
284
  testType?: string;
285
+ /** Viewport dimensions for context-aware evaluation */
223
286
  viewport?: { width: number; height: number };
287
+ /** Game state or application state for context */
224
288
  gameState?: Record<string, unknown>;
289
+ /** Enable caching (default: true) */
225
290
  useCache?: boolean;
291
+ /** Request timeout in milliseconds */
226
292
  timeout?: number;
293
+ /** Use explicit rubric for consistent scoring */
227
294
  useRubric?: boolean;
295
+ /** Include dimension scores in evaluation */
228
296
  includeDimensions?: boolean;
297
+ /** URL of the page being tested */
229
298
  url?: string;
299
+ /** Description of the test scenario */
230
300
  description?: string;
301
+ /** Current step in multi-step test */
231
302
  step?: string;
303
+ /** Custom prompt builder function */
232
304
  promptBuilder?: (prompt: string, context: ValidationContext) => string;
305
+ /** Auto-select model tier (fast/balanced/best) based on context */
306
+ autoSelectTier?: boolean;
307
+ /** Auto-select provider (cheapest available) */
308
+ autoSelectProvider?: boolean;
309
+ /** Include cost comparison in results */
310
+ includeCostComparison?: boolean;
311
+ /** Frequency for high-frequency validation (Hz) */
312
+ frequency?: number;
313
+ /** Cost sensitivity flag for optimization */
314
+ costSensitive?: boolean;
315
+ /** Criticality level (low/medium/high/critical) */
316
+ criticality?: 'low' | 'medium' | 'high' | 'critical';
317
+ /** Model tier to use (fast/balanced/best) */
318
+ modelTier?: 'fast' | 'balanced' | 'best';
319
+ /** Temporal decision options (for high-frequency validation) */
320
+ useTemporalDecision?: boolean;
321
+ /** Temporal notes for decision context */
322
+ temporalNotes?: TemporalNote[];
323
+ /** Current state for temporal decision */
324
+ currentState?: Record<string, unknown>;
325
+ /** Previous state for temporal decision */
326
+ previousState?: Record<string, unknown>;
327
+ /** Previous result for temporal decision */
328
+ previousResult?: ValidationResult;
329
+ /** Temporal decision manager options */
330
+ temporalDecisionOptions?: Record<string, unknown>;
331
+ /** Per-call visual anchors (appended to config-level anchors) */
332
+ anchors?: VisualAnchors | null;
233
333
  }
234
334
 
235
335
  export interface EstimatedCost {
@@ -250,25 +350,121 @@ export interface SemanticInfo {
250
350
  zeroToleranceViolations?: string[];
251
351
  }
252
352
 
353
+ /**
354
+ * Result of screenshot validation.
355
+ *
356
+ * Contains the AI's evaluation of the screenshot, including score, issues,
357
+ * reasoning, and metadata about the validation process.
358
+ *
359
+ * @example
360
+ * ```typescript
361
+ * const result: ValidationResult = {
362
+ * enabled: true,
363
+ * provider: 'gemini',
364
+ * score: 8.5,
365
+ * issues: ['Low contrast on submit button'],
366
+ * assessment: 'Good',
367
+ * reasoning: 'The form is mostly accessible...',
368
+ * estimatedCost: { totalCost: '0.000123', currency: 'USD' },
369
+ * responseTime: 1234,
370
+ * cached: false
371
+ * };
372
+ * ```
373
+ */
253
374
  export interface ValidationResult {
375
+ /** Whether validation was enabled (false if API key missing) */
254
376
  enabled: boolean;
377
+ /** LLM provider used (gemini, openai, claude, groq) */
255
378
  provider: string;
379
+ /** Quality score (0-10, null if validation failed) */
256
380
  score: number | null;
381
+ /** List of issues found */
257
382
  issues: string[];
383
+ /** Overall assessment (e.g., 'Good', 'Needs Improvement') */
258
384
  assessment: string | null;
385
+ /** Detailed reasoning for the score */
259
386
  reasoning: string;
387
+ /** Estimated API cost breakdown */
260
388
  estimatedCost?: EstimatedCost | null;
389
+ /** Response time in milliseconds */
261
390
  responseTime: number;
391
+ /** Whether result was served from cache */
262
392
  cached?: boolean;
393
+ /** Raw judgment text from LLM */
263
394
  judgment?: string;
395
+ /** Raw API response */
264
396
  raw?: unknown;
397
+ /** Extracted semantic information */
265
398
  semantic?: SemanticInfo;
399
+ /** Error message if validation failed */
266
400
  error?: string;
401
+ /** Status message */
267
402
  message?: string;
403
+ /** Provider pricing information */
268
404
  pricing?: { input: number; output: number };
405
+ /** Timestamp of validation */
269
406
  timestamp?: string;
407
+ /** Test name if provided */
270
408
  testName?: string;
409
+ /** Viewport dimensions if provided */
271
410
  viewport?: { width: number; height: number } | null;
411
+ /** Cost comparison information (if includeCostComparison enabled) */
412
+ costComparison?: {
413
+ current: { tier: string; provider: string; cost: number };
414
+ tiers: Record<string, number>;
415
+ savings: Record<string, { absolute: number; percent: number; cost: number }>;
416
+ recommendation: { tier: string; cost: number; savings: number; savingsPercent: number; reason: string };
417
+ };
418
+ /** Whether temporal decision skipped this call */
419
+ skipped?: boolean;
420
+ /** Reason for skipping (if skipped) */
421
+ skipReason?: string;
422
+ /** Urgency level (if temporal decision used) */
423
+ urgency?: 'low' | 'medium' | 'high';
424
+ }
425
+
426
+ /**
427
+ * A single visual anchor: either a plain text string or an object
428
+ * with optional dimension scoping and/or an image reference.
429
+ *
430
+ * Plain string: `"Card images large enough to see art"`
431
+ * With dimension: `{ text: "Card images large", dimension: "card_presentation" }`
432
+ * Image ref: `{ image: "/path/to/good.png", label: "Well-themed Magic layout" }`
433
+ * Image + dimension: `{ image: "/path/to/good.png", label: "...", dimension: "game_authenticity" }`
434
+ *
435
+ * Images accept a file path or a data URI (`data:image/png;base64,...`).
436
+ */
437
+ export type AnchorEntry = string | {
438
+ /** Text description of the anchor signal */
439
+ text?: string;
440
+ /** File path or data URI of a reference screenshot */
441
+ image?: string;
442
+ /** Short label for the image (shown in prompt) */
443
+ label?: string;
444
+ /** Rubric dimension this anchor relates to (e.g., "game_authenticity") */
445
+ dimension?: string;
446
+ };
447
+
448
+ /**
449
+ * Domain-level visual anchors for VLM evaluation grounding.
450
+ *
451
+ * Text anchors describe what to look for / flag in words.
452
+ * Image anchors provide reference screenshots as few-shot visual examples
453
+ * so the VLM can calibrate against concrete good/bad instances.
454
+ *
455
+ * Anchors can optionally be scoped to rubric dimensions via the
456
+ * `dimension` field on AnchorEntry objects.
457
+ *
458
+ * Set once in config for the project; per-call anchors in
459
+ * ValidationContext append to (not replace) config-level anchors.
460
+ */
461
+ export interface VisualAnchors {
462
+ /** Brief domain description injected as context (e.g., "Card game search UI for TCG players") */
463
+ domain?: string;
464
+ /** Positive signals the VLM should look for (text and/or image entries) */
465
+ positive?: AnchorEntry[];
466
+ /** Negative signals the VLM should flag (text and/or image entries) */
467
+ negative?: AnchorEntry[];
272
468
  }
273
469
 
274
470
  export interface ConfigOptions {
@@ -280,6 +476,8 @@ export interface ConfigOptions {
280
476
  maxConcurrency?: number;
281
477
  timeout?: number;
282
478
  verbose?: boolean;
479
+ /** Domain-level visual anchors included in every evaluation prompt */
480
+ anchors?: VisualAnchors | null;
283
481
  }
284
482
 
285
483
  export interface Config {
@@ -294,6 +492,8 @@ export interface Config {
294
492
  priority: number;
295
493
  };
296
494
  enabled: boolean;
495
+ /** Normalized visual anchors (null when none configured) */
496
+ anchors: VisualAnchors | null;
297
497
  cache: {
298
498
  enabled: boolean;
299
499
  dir: string | null;
@@ -307,28 +507,183 @@ export interface Config {
307
507
  };
308
508
  }
309
509
 
310
- // VLLMJudge Class
510
+ /**
511
+ * VLLM Judge Class
512
+ *
513
+ * Core screenshot validation engine using Vision Language Models.
514
+ * Supports multiple providers (Gemini, OpenAI, Claude, Groq) with automatic
515
+ * selection, caching, and cost optimization.
516
+ *
517
+ * **Use when:** You need fine-grained control over validation or custom judge implementations.
518
+ * **Otherwise:** Use `validateScreenshot()` function for simpler API.
519
+ *
520
+ * @example
521
+ * ```typescript
522
+ * // Create custom judge instance
523
+ * const judge = new VLLMJudge({
524
+ * provider: 'gemini',
525
+ * apiKey: process.env.GEMINI_API_KEY,
526
+ * cacheEnabled: true
527
+ * });
528
+ *
529
+ * const result = await judge.judgeScreenshot(
530
+ * 'screenshot.png',
531
+ * 'Evaluate this page'
532
+ * );
533
+ * ```
534
+ */
311
535
  export class VLLMJudge {
536
+ /**
537
+ * Create a new VLLM Judge instance.
538
+ *
539
+ * @param options - Configuration options (provider, API key, cache, etc.)
540
+ */
312
541
  constructor(options?: ConfigOptions);
542
+
543
+ /** Current provider name (gemini, openai, claude, groq) */
313
544
  provider: string;
545
+ /** API key for current provider */
314
546
  apiKey: string | null;
547
+ /** Provider configuration (model, pricing, etc.) */
315
548
  providerConfig: Config['providerConfig'];
549
+ /** Whether validation is enabled (false if API key missing) */
316
550
  enabled: boolean;
317
551
 
552
+ /**
553
+ * Convert image file to base64 string for API.
554
+ *
555
+ * @param imagePath - Path to image file
556
+ * @returns Base64-encoded image string
557
+ * @throws {FileError} If file not found or invalid format
558
+ */
318
559
  imageToBase64(imagePath: string): string;
560
+
561
+ /**
562
+ * Build evaluation prompt with context.
563
+ *
564
+ * @param prompt - Base evaluation prompt
565
+ * @param context - Validation context
566
+ * @returns Enhanced prompt with context
567
+ */
319
568
  buildPrompt(prompt: string, context: ValidationContext): string;
569
+
570
+ /**
571
+ * Extract semantic information from judgment text.
572
+ *
573
+ * @param judgment - Judgment text or object
574
+ * @returns Structured semantic information
575
+ */
320
576
  extractSemanticInfo(judgment: string | object): SemanticInfo;
577
+
578
+ /**
579
+ * Estimate API cost for validation.
580
+ *
581
+ * @param data - API request/response data
582
+ * @param provider - Provider name
583
+ * @returns Estimated cost breakdown or null
584
+ */
321
585
  estimateCost(data: unknown, provider: string): EstimatedCost | null;
322
- judgeScreenshot(imagePath: string, prompt: string, context?: ValidationContext): Promise<ValidationResult>;
586
+
587
+ /**
588
+ * Judge a screenshot using VLLM.
589
+ *
590
+ * @param imagePath - Path to screenshot or array for comparison
591
+ * @param prompt - Evaluation prompt
592
+ * @param context - Optional validation context
593
+ * @returns Promise resolving to ValidationResult
594
+ */
595
+ judgeScreenshot(imagePath: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult>;
323
596
  }
324
597
 
325
598
  // Core Functions
599
+ /**
600
+ * Validate a screenshot using Vision Language Models (VLLM).
601
+ *
602
+ * This is the primary API function. It takes a screenshot and evaluation prompt,
603
+ * sends it to an AI model (Gemini, OpenAI, Claude, or Groq), and returns structured
604
+ * validation results with score, issues, and reasoning.
605
+ *
606
+ * **Key Features:**
607
+ * - Automatic provider selection (cheapest available)
608
+ * - Automatic tier selection (fast/balanced/best)
609
+ * - Built-in caching (7-day TTL)
610
+ * - Cost optimization
611
+ * - Temporal decision making (for high-frequency validation)
612
+ *
613
+ * @param imagePath - Path to screenshot file (PNG, JPEG, GIF, WebP) or array of paths for comparison
614
+ * @param prompt - Evaluation prompt (e.g., "Is this accessible?", "Check if payment form works")
615
+ * @param context - Optional validation context (testType, viewport, optimization options)
616
+ * @returns Promise resolving to ValidationResult with score, issues, reasoning, and metadata
617
+ *
618
+ * @example
619
+ * ```typescript
620
+ * // Basic usage
621
+ * const result = await validateScreenshot(
622
+ * 'screenshot.png',
623
+ * 'Check if this payment form is accessible'
624
+ * );
625
+ * console.log(result.score); // 8.5 (0-10 scale)
626
+ * console.log(result.issues); // ['Low contrast on button', 'Missing label']
627
+ * console.log(result.reasoning); // "The form is mostly accessible..."
628
+ * ```
629
+ *
630
+ * @example
631
+ * ```typescript
632
+ * // With cost optimization
633
+ * const result = await validateScreenshot(
634
+ * 'screenshot.png',
635
+ * 'Evaluate accessibility',
636
+ * {
637
+ * autoSelectTier: true,
638
+ * autoSelectProvider: true,
639
+ * includeCostComparison: true
640
+ * }
641
+ * );
642
+ * console.log(result.costComparison?.savings.fast?.percent); // 45% savings
643
+ * ```
644
+ *
645
+ * @example
646
+ * ```typescript
647
+ * // High-frequency validation (60Hz)
648
+ * const result = await validateScreenshot(
649
+ * 'frame.png',
650
+ * 'Is the game playable?',
651
+ * {
652
+ * frequency: 60,
653
+ * autoSelectTier: true,
654
+ * useTemporalDecision: true
655
+ * }
656
+ * );
657
+ * ```
658
+ *
659
+ * @throws {FileError} If screenshot file not found or invalid format
660
+ * @throws {ValidationError} If validation fails
661
+ * @throws {ProviderError} If API provider error occurs
662
+ * @throws {TimeoutError} If request times out
663
+ */
326
664
  export function validateScreenshot(
327
- imagePath: string,
665
+ imagePath: string | string[],
328
666
  prompt: string,
329
667
  context?: ValidationContext
330
668
  ): Promise<ValidationResult>;
331
669
 
670
+ /**
671
+ * Extract semantic information from VLLM judgment text.
672
+ *
673
+ * Parses AI judgment responses into structured data (score, issues, reasoning).
674
+ * Useful for custom implementations that need to parse judgment text.
675
+ *
676
+ * @param judgment - Judgment text or object from VLLM
677
+ * @returns Structured semantic information with score, issues, assessment, reasoning
678
+ *
679
+ * @example
680
+ * ```typescript
681
+ * const judgment = "Score: 8.5. Issues: Low contrast. Reasoning: The form is mostly accessible...";
682
+ * const info = extractSemanticInfo(judgment);
683
+ * console.log(info.score); // 8.5
684
+ * console.log(info.issues); // ['Low contrast']
685
+ * ```
686
+ */
332
687
  export function extractSemanticInfo(judgment: string | object): SemanticInfo;
333
688
 
334
689
  // Multi-Modal Types
@@ -405,11 +760,33 @@ export function multiModalValidation(
405
760
  }>;
406
761
 
407
762
  // Temporal Types
763
+ /**
764
+ * Temporal note for tracking state over time.
765
+ *
766
+ * Used in high-frequency validation (10-60Hz) to track observations
767
+ * and enable temporal decision making (reduces LLM calls by 98.5%).
768
+ *
769
+ * @example
770
+ * ```typescript
771
+ * const note: TemporalNote = {
772
+ * timestamp: Date.now(),
773
+ * elapsed: 100,
774
+ * score: 8.5,
775
+ * observation: 'Button clicked',
776
+ * step: 'checkout'
777
+ * };
778
+ * ```
779
+ */
408
780
  export interface TemporalNote {
781
+ /** Timestamp in milliseconds */
409
782
  timestamp?: number;
783
+ /** Elapsed time since start in milliseconds */
410
784
  elapsed?: number;
785
+ /** Quality score (0-10) */
411
786
  score?: number;
787
+ /** Observation description */
412
788
  observation?: string;
789
+ /** Step identifier */
413
790
  step?: string;
414
791
  }
415
792
 
@@ -437,6 +814,36 @@ export interface AggregatedTemporalNotes {
437
814
  }
438
815
 
439
816
  // Temporal Functions
817
+ /**
818
+ * Aggregate temporal notes into time windows with weighted scores.
819
+ *
820
+ * Used for high-frequency validation to reduce LLM calls by aggregating
821
+ * observations over time windows. Implements exponential decay weighting
822
+ * (recent notes weighted more heavily).
823
+ *
824
+ * **Research:** Inspired by arXiv:2505.17663 (DynToM) and arXiv:2507.15851
825
+ * (Human Temporal Cognition), adapted with exponential decay for practical use.
826
+ *
827
+ * @param notes - Array of temporal notes to aggregate
828
+ * @param options - Aggregation options
829
+ * @param options.windowSize - Time window size in milliseconds (default: 1000)
830
+ * @param options.decayFactor - Exponential decay factor (default: 0.9)
831
+ * @param options.coherenceThreshold - Coherence threshold for filtering (default: 0.5)
832
+ * @returns Aggregated notes with windows, summary, and coherence score
833
+ *
834
+ * @example
835
+ * ```typescript
836
+ * const notes: TemporalNote[] = [
837
+ * { timestamp: 0, score: 8, observation: 'Initial state' },
838
+ * { timestamp: 100, score: 8.5, observation: 'Button clicked' },
839
+ * { timestamp: 200, score: 9, observation: 'Form submitted' }
840
+ * ];
841
+ *
842
+ * const aggregated = aggregateTemporalNotes(notes);
843
+ * console.log(aggregated.coherence); // 0.92 (high coherence)
844
+ * console.log(aggregated.windows[0].avgScore); // 8.5
845
+ * ```
846
+ */
440
847
  export function aggregateTemporalNotes(
441
848
  notes: TemporalNote[],
442
849
  options?: {
@@ -450,6 +857,223 @@ export function formatNotesForPrompt(aggregated: AggregatedTemporalNotes): strin
450
857
 
451
858
  export function calculateCoherence(windows: TemporalWindow[]): number;
452
859
 
860
+ /**
861
+ * Temporal Decision Manager
862
+ *
863
+ * Decides when to call LLM vs. reuse previous result for high-frequency validation.
864
+ * Reduces LLM calls by 98.5% while maintaining accuracy through temporal coherence.
865
+ *
866
+ * **Research:** Based on arXiv:2406.12125 - "Efficient Sequential Decision Making with Large Language Models"
867
+ *
868
+ * **Core Insight:** Don't prompt on every state change, prompt when decision is needed.
869
+ *
870
+ * **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
871
+ *
872
+ * @example
873
+ * ```typescript
874
+ * const manager = new TemporalDecisionManager({
875
+ * minNotesForPrompt: 3,
876
+ * coherenceThreshold: 0.5,
877
+ * urgencyThreshold: 0.3
878
+ * });
879
+ *
880
+ * const decision = await manager.shouldPrompt(
881
+ * currentState,
882
+ * previousState,
883
+ * temporalNotes,
884
+ * context
885
+ * );
886
+ *
887
+ * if (decision.shouldPrompt) {
888
+ * // Call LLM
889
+ * } else {
890
+ * // Reuse previous result
891
+ * }
892
+ * ```
893
+ */
894
+ export class TemporalDecisionManager {
895
+ /**
896
+ * Create a new Temporal Decision Manager.
897
+ *
898
+ * @param options - Decision manager options
899
+ * @param options.minNotesForPrompt - Minimum notes before prompting (default: 3)
900
+ * @param options.coherenceThreshold - Coherence threshold for prompting (default: 0.5)
901
+ * @param options.urgencyThreshold - Urgency threshold for prompting (default: 0.3)
902
+ * @param options.maxWaitTime - Maximum wait time before forcing prompt (default: 10000ms)
903
+ * @param options.stateChangeThreshold - State change threshold for prompting (default: 0.2)
904
+ * @param options.warmStartSteps - Use LLM for first N steps (default: 10)
905
+ * @param options.adaptiveSampling - Enable adaptive sampling (default: true)
906
+ */
907
+ constructor(options?: {
908
+ minNotesForPrompt?: number;
909
+ coherenceThreshold?: number;
910
+ urgencyThreshold?: number;
911
+ maxWaitTime?: number;
912
+ stateChangeThreshold?: number;
913
+ warmStartSteps?: number;
914
+ adaptiveSampling?: boolean;
915
+ });
916
+
917
+ /**
918
+ * Decide if we should prompt now or wait for more context.
919
+ *
920
+ * @param currentState - Current state object
921
+ * @param previousState - Previous state object (if any)
922
+ * @param temporalNotes - Array of temporal notes
923
+ * @param context - Additional context
924
+ * @returns Decision object with shouldPrompt, reason, and urgency
925
+ */
926
+ shouldPrompt(
927
+ currentState: Record<string, unknown>,
928
+ previousState: Record<string, unknown> | null,
929
+ temporalNotes: TemporalNote[],
930
+ context?: Record<string, unknown>
931
+ ): Promise<{
932
+ shouldPrompt: boolean;
933
+ reason: string;
934
+ urgency: 'low' | 'medium' | 'high';
935
+ }>;
936
+
937
+ /**
938
+ * Calculate state change magnitude.
939
+ *
940
+ * @param currentState - Current state
941
+ * @param previousState - Previous state
942
+ * @returns State change score (0-1)
943
+ */
944
+ calculateStateChange(
945
+ currentState: Record<string, unknown>,
946
+ previousState: Record<string, unknown> | null
947
+ ): number;
948
+
949
+ /**
950
+ * Check if current state is a decision point.
951
+ *
952
+ * @param currentState - Current state
953
+ * @param context - Additional context
954
+ * @returns True if decision point
955
+ */
956
+ isDecisionPoint(
957
+ currentState: Record<string, unknown>,
958
+ context?: Record<string, unknown>
959
+ ): boolean;
960
+
961
+ /**
962
+ * Check if there's a recent user action.
963
+ *
964
+ * @param temporalNotes - Array of temporal notes
965
+ * @param context - Additional context
966
+ * @returns True if recent user action detected
967
+ */
968
+ hasRecentUserAction(
969
+ temporalNotes: TemporalNote[],
970
+ context?: Record<string, unknown>
971
+ ): boolean;
972
+ }
973
+
974
+ /**
975
+ * Create a temporal decision manager with default options.
976
+ *
977
+ * @param options - Decision manager options
978
+ * @returns New TemporalDecisionManager instance
979
+ */
980
+ export function createTemporalDecisionManager(options?: {
981
+ minNotesForPrompt?: number;
982
+ coherenceThreshold?: number;
983
+ urgencyThreshold?: number;
984
+ maxWaitTime?: number;
985
+ stateChangeThreshold?: number;
986
+ warmStartSteps?: number;
987
+ adaptiveSampling?: boolean;
988
+ }): TemporalDecisionManager;
989
+
990
+ /**
991
+ * Temporal Preprocessing Manager
992
+ *
993
+ * Optimizes temporal note processing for high-frequency validation (10-60Hz).
994
+ * Implements activity-based preprocessing patterns to reduce computational overhead.
995
+ *
996
+ * **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
997
+ *
998
+ * @example
999
+ * ```typescript
1000
+ * const manager = new TemporalPreprocessingManager({
1001
+ * activityThreshold: 0.5,
1002
+ * highFrequencyMode: true
1003
+ * });
1004
+ *
1005
+ * const processed = await manager.preprocess(temporalNotes, context);
1006
+ * ```
1007
+ */
1008
+ export class TemporalPreprocessingManager {
1009
+ /**
1010
+ * Create a new Temporal Preprocessing Manager.
1011
+ *
1012
+ * @param options - Preprocessing options
1013
+ */
1014
+ constructor(options?: Record<string, unknown>);
1015
+
1016
+ /**
1017
+ * Preprocess temporal notes for efficient handling.
1018
+ *
1019
+ * @param notes - Array of temporal notes
1020
+ * @param context - Additional context
1021
+ * @returns Processed notes
1022
+ */
1023
+ preprocess(
1024
+ notes: TemporalNote[],
1025
+ context?: Record<string, unknown>
1026
+ ): Promise<TemporalNote[]>;
1027
+ }
1028
+
1029
+ /**
1030
+ * Adaptive Temporal Processor
1031
+ *
1032
+ * Adaptively processes temporal notes based on activity patterns.
1033
+ *
1034
+ * @example
1035
+ * ```typescript
1036
+ * const processor = new AdaptiveTemporalProcessor();
1037
+ * const processed = await processor.process(notes, context);
1038
+ * ```
1039
+ */
1040
+ export class AdaptiveTemporalProcessor {
1041
+ /**
1042
+ * Create a new Adaptive Temporal Processor.
1043
+ *
1044
+ * @param options - Processor options
1045
+ */
1046
+ constructor(options?: Record<string, unknown>);
1047
+
1048
+ /**
1049
+ * Process temporal notes adaptively.
1050
+ *
1051
+ * @param notes - Array of temporal notes
1052
+ * @param context - Additional context
1053
+ * @returns Processed notes
1054
+ */
1055
+ process(
1056
+ notes: TemporalNote[],
1057
+ context?: Record<string, unknown>
1058
+ ): Promise<TemporalNote[]>;
1059
+ }
1060
+
1061
+ /**
1062
+ * Create a temporal preprocessing manager with default options.
1063
+ *
1064
+ * @param options - Preprocessing options
1065
+ * @returns New TemporalPreprocessingManager instance
1066
+ */
1067
+ export function createTemporalPreprocessingManager(options?: Record<string, unknown>): TemporalPreprocessingManager;
1068
+
1069
+ /**
1070
+ * Create an adaptive temporal processor with default options.
1071
+ *
1072
+ * @param options - Processor options
1073
+ * @returns New AdaptiveTemporalProcessor instance
1074
+ */
1075
+ export function createAdaptiveTemporalProcessor(options?: Record<string, unknown>): AdaptiveTemporalProcessor;
1076
+
453
1077
  // Cache Types
454
1078
  export interface CacheStats {
455
1079
  hits: number;
@@ -459,16 +1083,77 @@ export interface CacheStats {
459
1083
  }
460
1084
 
461
1085
  // Cache Functions
1086
+ /**
1087
+ * Initialize cache system.
1088
+ *
1089
+ * Sets up file-based caching with 7-day TTL. Cache persists across
1090
+ * process restarts and reduces API costs by serving cached results.
1091
+ *
1092
+ * @param cacheDir - Cache directory path (default: `.cache/ai-visual-test`)
1093
+ *
1094
+ * @example
1095
+ * ```typescript
1096
+ * initCache('/tmp/my-cache');
1097
+ * const result = await validateScreenshot('screenshot.png', 'Evaluate');
1098
+ * // Subsequent calls with same screenshot/prompt use cache
1099
+ * ```
1100
+ */
462
1101
  export function initCache(cacheDir?: string): void;
1102
+
1103
+ /**
1104
+ * Generate cache key for validation request.
1105
+ *
1106
+ * Creates SHA-256 hash of image path, prompt, and context for cache lookup.
1107
+ *
1108
+ * @param imagePath - Screenshot path
1109
+ * @param prompt - Evaluation prompt
1110
+ * @param context - Validation context
1111
+ * @returns Cache key string
1112
+ */
463
1113
  export function generateCacheKey(imagePath: string, prompt: string, context?: ValidationContext): string;
1114
+
1115
+ /**
1116
+ * Get cached validation result.
1117
+ *
1118
+ * @param imagePath - Screenshot path
1119
+ * @param prompt - Evaluation prompt
1120
+ * @param context - Validation context
1121
+ * @returns Cached ValidationResult or null if not cached
1122
+ */
464
1123
  export function getCached(imagePath: string, prompt: string, context?: ValidationContext): ValidationResult | null;
1124
+
1125
+ /**
1126
+ * Cache validation result.
1127
+ *
1128
+ * @param imagePath - Screenshot path
1129
+ * @param prompt - Evaluation prompt
1130
+ * @param context - Validation context
1131
+ * @param result - Validation result to cache
1132
+ */
465
1133
  export function setCached(
466
1134
  imagePath: string,
467
1135
  prompt: string,
468
1136
  context: ValidationContext,
469
1137
  result: ValidationResult
470
1138
  ): void;
1139
+
1140
+ /**
1141
+ * Clear all cached results.
1142
+ */
471
1143
  export function clearCache(): void;
1144
+
1145
+ /**
1146
+ * Get cache statistics.
1147
+ *
1148
+ * @returns Cache stats (hits, misses, size, hit rate)
1149
+ *
1150
+ * @example
1151
+ * ```typescript
1152
+ * const stats = getCacheStats();
1153
+ * console.log(`Hit rate: ${stats.hitRate * 100}%`); // 85%
1154
+ * console.log(`Cache size: ${stats.size}`); // 123
1155
+ * ```
1156
+ */
472
1157
  export function getCacheStats(): CacheStats;
473
1158
 
474
1159
  // Config Functions
@@ -507,10 +1192,58 @@ export class ScoreTracker {
507
1192
  }
508
1193
 
509
1194
  // BatchOptimizer Class
1195
+ /**
1196
+ * Batch Optimizer
1197
+ *
1198
+ * Optimizes validation of multiple screenshots by batching requests,
1199
+ * managing concurrency, and caching results.
1200
+ *
1201
+ * **Use when:** You need to validate multiple screenshots efficiently.
1202
+ *
1203
+ * @example
1204
+ * ```typescript
1205
+ * const optimizer = new BatchOptimizer({
1206
+ * maxConcurrency: 5,
1207
+ * batchSize: 10,
1208
+ * cacheEnabled: true
1209
+ * });
1210
+ *
1211
+ * const results = await optimizer.batchValidate(
1212
+ * ['screenshot1.png', 'screenshot2.png', 'screenshot3.png'],
1213
+ * 'Evaluate accessibility'
1214
+ * );
1215
+ *
1216
+ * console.log(results.length); // 3
1217
+ * ```
1218
+ */
510
1219
  export class BatchOptimizer {
1220
+ /**
1221
+ * Create a new Batch Optimizer instance.
1222
+ *
1223
+ * @param options - Optimizer options (maxConcurrency, batchSize, cacheEnabled)
1224
+ */
511
1225
  constructor(options?: { maxConcurrency?: number; batchSize?: number; cacheEnabled?: boolean });
1226
+
1227
+ /**
1228
+ * Validate multiple screenshots in batch.
1229
+ *
1230
+ * @param imagePaths - Single path, array of paths, or array of arrays for comparison
1231
+ * @param prompt - Evaluation prompt
1232
+ * @param context - Optional validation context
1233
+ * @returns Promise resolving to array of ValidationResults
1234
+ */
512
1235
  batchValidate(imagePaths: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult[]>;
1236
+
1237
+ /**
1238
+ * Clear batch optimizer cache.
1239
+ */
513
1240
  clearCache(): void;
1241
+
1242
+ /**
1243
+ * Get cache statistics.
1244
+ *
1245
+ * @returns Cache stats (size, queue length, active requests)
1246
+ */
514
1247
  getCacheStats(): { cacheSize: number; queueLength: number; activeRequests: number };
515
1248
  }
516
1249
 
@@ -775,18 +1508,76 @@ export interface StateValidationResult<T = unknown> extends ValidationResult {
775
1508
  matches: boolean;
776
1509
  }
777
1510
 
1511
+ /**
1512
+ * State Validator
1513
+ *
1514
+ * Validates that visual state matches expected state using VLLM extraction.
1515
+ * Extracts state from screenshot and compares with expected state.
1516
+ *
1517
+ * **Use when:** You need to verify specific state values (cart count, button text, etc.)
1518
+ *
1519
+ * @example
1520
+ * ```typescript
1521
+ * const validator = new StateValidator();
1522
+ *
1523
+ * const result = await validator.validateState(
1524
+ * 'checkout.png',
1525
+ * {
1526
+ * cartCount: 1,
1527
+ * buttonText: 'Checkout'
1528
+ * },
1529
+ * {
1530
+ * testType: 'cart-state'
1531
+ * }
1532
+ * );
1533
+ *
1534
+ * console.log(result.matches); // true/false
1535
+ * console.log(result.discrepancies); // ['cartCount: expected 1, got 2']
1536
+ * ```
1537
+ */
778
1538
  export class StateValidator<T = unknown> {
1539
+ /**
1540
+ * Create a new State Validator instance.
1541
+ *
1542
+ * @param options - Validator options (tolerance, state extractor, etc.)
1543
+ */
779
1544
  constructor(options?: StateValidatorOptions<T>);
1545
+
1546
+ /**
1547
+ * Validate state (static method).
1548
+ *
1549
+ * @param screenshotPath - Path to screenshot or array for comparison
1550
+ * @param expectedState - Expected state object
1551
+ * @param options - Validation options
1552
+ * @returns Promise resolving to StateValidationResult
1553
+ */
780
1554
  static validate<T = unknown>(
781
1555
  screenshotPath: string | string[],
782
1556
  expectedState: T,
783
1557
  options?: StateValidationOptions<T>
784
1558
  ): Promise<StateValidationResult<T>>;
1559
+
1560
+ /**
1561
+ * Validate state matches expected state.
1562
+ *
1563
+ * @param screenshotPath - Path to screenshot or array for comparison
1564
+ * @param expectedState - Expected state object
1565
+ * @param options - Validation options
1566
+ * @returns Promise resolving to StateValidationResult
1567
+ */
785
1568
  validateState(
786
1569
  screenshotPath: string | string[],
787
1570
  expectedState: T,
788
1571
  options?: StateValidationOptions<T>
789
1572
  ): Promise<StateValidationResult<T>>;
1573
+
1574
+ /**
1575
+ * Build state validation prompt.
1576
+ *
1577
+ * @param expectedState - Expected state object
1578
+ * @param options - Validation options
1579
+ * @returns Validation prompt string
1580
+ */
790
1581
  buildStatePrompt(expectedState: T, options?: StateValidationOptions<T>): string;
791
1582
  }
792
1583
 
@@ -820,22 +1611,90 @@ export interface AccessibilityResult extends ValidationResult {
820
1611
  standards: string[];
821
1612
  }
822
1613
 
1614
+ /**
1615
+ * Accessibility Validator
1616
+ *
1617
+ * Validates accessibility using VLLM semantic evaluation.
1618
+ * Checks contrast, labels, keyboard navigation, error messages, and WCAG compliance.
1619
+ *
1620
+ * **Use when:** You need comprehensive accessibility validation beyond programmatic checks.
1621
+ *
1622
+ * @example
1623
+ * ```typescript
1624
+ * const validator = new AccessibilityValidator({
1625
+ * minContrast: 4.5,
1626
+ * standards: ['WCAG-AA']
1627
+ * });
1628
+ *
1629
+ * const result = await validator.validateAccessibility(
1630
+ * 'payment-form.png',
1631
+ * {
1632
+ * testType: 'accessibility'
1633
+ * }
1634
+ * );
1635
+ *
1636
+ * console.log(result.passes); // true/false
1637
+ * console.log(result.violations.zeroTolerance); // Critical violations
1638
+ * ```
1639
+ */
823
1640
  export class AccessibilityValidator {
1641
+ /**
1642
+ * Create a new Accessibility Validator instance.
1643
+ *
1644
+ * @param options - Validator options (minContrast, standards, etc.)
1645
+ */
824
1646
  constructor(options?: AccessibilityValidatorOptions);
1647
+
1648
+ /**
1649
+ * Validate accessibility (static method).
1650
+ *
1651
+ * @param screenshotPath - Path to screenshot or array for comparison
1652
+ * @param options - Validation options
1653
+ * @returns Promise resolving to AccessibilityResult
1654
+ */
825
1655
  static validate(
826
1656
  screenshotPath: string | string[],
827
1657
  options?: AccessibilityOptions
828
1658
  ): Promise<AccessibilityResult>;
1659
+
1660
+ /**
1661
+ * Validate accessibility of screenshot.
1662
+ *
1663
+ * @param screenshotPath - Path to screenshot or array for comparison
1664
+ * @param options - Validation options
1665
+ * @returns Promise resolving to AccessibilityResult
1666
+ */
829
1667
  validateAccessibility(
830
1668
  screenshotPath: string | string[],
831
1669
  options?: AccessibilityOptions
832
1670
  ): Promise<AccessibilityResult>;
1671
+
1672
+ /**
1673
+ * Build accessibility validation prompt.
1674
+ *
1675
+ * @param options - Validation options
1676
+ * @returns Validation prompt string
1677
+ */
833
1678
  buildAccessibilityPrompt(options?: AccessibilityOptions): string;
1679
+
1680
+ /**
1681
+ * Detect accessibility violations from validation result.
1682
+ *
1683
+ * @param result - Validation result
1684
+ * @returns Categorized violations (zeroTolerance, critical, warnings)
1685
+ */
834
1686
  detectViolations(result: ValidationResult): {
835
1687
  zeroTolerance: string[];
836
1688
  critical: string[];
837
1689
  warnings: string[];
838
1690
  };
1691
+
1692
+ /**
1693
+ * Extract contrast information from validation result.
1694
+ *
1695
+ * @param result - Validation result
1696
+ * @returns Contrast ratios and compliance status
1697
+ */
839
1698
  extractContrastInfo(result: ValidationResult): {
840
1699
  ratios: string[];
841
1700
  minRatio: number | null;