@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
package/index.d.ts ADDED
@@ -0,0 +1,1221 @@
1
+ /**
2
+ * TypeScript definitions for ai-visual-test
3
+ *
4
+ * Provides type safety and IntelliSense support for the package.
5
+ */
6
+
7
+ // Utility Types
8
+ /**
9
+ * Make specific properties optional
10
+ * @template T
11
+ * @template K
12
+ */
13
+ export type PartialBy<T, K extends keyof T> = Omit<T, K> & Partial<Pick<T, K>>;
14
+
15
+ /**
16
+ * Make specific properties required
17
+ * @template T
18
+ * @template K
19
+ */
20
+ export type RequiredBy<T, K extends keyof T> = T & Required<Pick<T, K>>;
21
+
22
+ /**
23
+ * Extract return type from a function
24
+ * @template T
25
+ */
26
+ export type ReturnType<T extends (...args: any[]) => any> = T extends (...args: any[]) => infer R ? R : never;
27
+
28
+ /**
29
+ * Extract parameter types from a function
30
+ * @template T
31
+ */
32
+ export type Parameters<T extends (...args: any[]) => any> = T extends (...args: infer P) => any ? P : never;
33
+
34
+ /**
35
+ * Deep partial - makes all nested properties optional
36
+ * @template T
37
+ */
38
+ export type DeepPartial<T> = T extends object ? { [P in keyof T]?: DeepPartial<T[P]> } : T;
39
+
40
+ /**
41
+ * Deep required - makes all nested properties required
42
+ * @template T
43
+ */
44
+ export type DeepRequired<T> = T extends object ? { [P in keyof T]-?: DeepRequired<T[P]> } : T;
45
+
46
+ /**
47
+ * Non-nullable - removes null and undefined from type
48
+ * @template T
49
+ */
50
+ export type NonNullable<T> = T extends null | undefined ? never : T;
51
+
52
+ /**
53
+ * Function type for validation functions
54
+ * @template T
55
+ */
56
+ export type ValidationFunction<T = ValidationResult> = (
57
+ imagePath: string,
58
+ prompt: string,
59
+ context?: ValidationContext
60
+ ) => Promise<T>;
61
+
62
+ // Error Types
63
+ export class AIBrowserTestError extends Error {
64
+ code: string;
65
+ details: Record<string, unknown>;
66
+ constructor(message: string, code: string, details?: Record<string, unknown>);
67
+ toJSON(): {
68
+ name: string;
69
+ code: string;
70
+ message: string;
71
+ details: Record<string, unknown>;
72
+ stack?: string;
73
+ };
74
+ }
75
+
76
+ export class ValidationError extends AIBrowserTestError {
77
+ constructor(message: string, details?: Record<string, unknown>);
78
+ }
79
+
80
+ export class CacheError extends AIBrowserTestError {
81
+ constructor(message: string, details?: Record<string, unknown>);
82
+ }
83
+
84
+ export class ConfigError extends AIBrowserTestError {
85
+ constructor(message: string, details?: Record<string, unknown>);
86
+ }
87
+
88
+ export class ProviderError extends AIBrowserTestError {
89
+ provider: string;
90
+ constructor(message: string, provider: string, details?: Record<string, unknown>);
91
+ }
92
+
93
+ export class TimeoutError extends AIBrowserTestError {
94
+ timeout: number;
95
+ constructor(message: string, timeout: number, details?: Record<string, unknown>);
96
+ }
97
+
98
+ export class FileError extends AIBrowserTestError {
99
+ filePath: string;
100
+ constructor(message: string, filePath: string, details?: Record<string, unknown>);
101
+ }
102
+
103
+ export class StateMismatchError extends ValidationError {
104
+ discrepancies: string[];
105
+ extracted: unknown;
106
+ expected: unknown;
107
+ constructor(discrepancies: string[], extracted: unknown, expected: unknown, message?: string);
108
+ }
109
+
110
+ export function isAIBrowserTestError(error: unknown): error is AIBrowserTestError;
111
+ export function isErrorType<T extends AIBrowserTestError>(error: unknown, errorClass: new (...args: any[]) => T): error is T;
112
+
113
+ // Rubrics
114
+ export interface Rubric {
115
+ score: {
116
+ description: string;
117
+ criteria: Record<string, string>;
118
+ };
119
+ dimensions?: Record<string, {
120
+ description: string;
121
+ criteria: string[];
122
+ }>;
123
+ }
124
+
125
+ export const DEFAULT_RUBRIC: Rubric;
126
+ export function buildRubricPrompt(rubric?: Rubric | null, includeDimensions?: boolean): string;
127
+ export function getRubricForTestType(testType: string): Rubric;
128
+
129
+ // Bias Detection
130
+ export interface BiasDetectionResult {
131
+ hasBias: boolean;
132
+ biases: Array<{
133
+ type: string;
134
+ detected: boolean;
135
+ score: number;
136
+ evidence: Record<string, unknown>;
137
+ }>;
138
+ severity: 'none' | 'low' | 'medium' | 'high';
139
+ recommendations: string[];
140
+ }
141
+
142
+ export function detectBias(judgment: string | object, options?: {
143
+ checkVerbosity?: boolean;
144
+ checkLength?: boolean;
145
+ checkFormatting?: boolean;
146
+ checkPosition?: boolean;
147
+ checkAuthority?: boolean;
148
+ }): BiasDetectionResult;
149
+
150
+ export interface PositionBiasResult {
151
+ detected: boolean;
152
+ firstBias?: boolean;
153
+ lastBias?: boolean;
154
+ reason?: string;
155
+ evidence?: {
156
+ firstScore: number;
157
+ lastScore: number;
158
+ avgMiddle: number;
159
+ allScores: number[];
160
+ };
161
+ }
162
+
163
+ export function detectPositionBias(judgments: Array<{ score: number | null }>): PositionBiasResult;
164
+
165
+ // Ensemble Judging
166
+ export interface EnsembleJudgeOptions {
167
+ judges?: Array<VLLMJudge>;
168
+ votingMethod?: 'weighted_average' | 'majority' | 'consensus';
169
+ weights?: number[];
170
+ minAgreement?: number;
171
+ enableBiasDetection?: boolean;
172
+ }
173
+
174
+ export interface EnsembleResult {
175
+ score: number | null;
176
+ assessment: string;
177
+ issues: string[];
178
+ reasoning: string;
179
+ confidence: number;
180
+ agreement: {
181
+ score: number;
182
+ scoreAgreement: number;
183
+ assessmentAgreement: number;
184
+ mean: number;
185
+ stdDev: number;
186
+ scores: number[];
187
+ };
188
+ disagreement: {
189
+ hasDisagreement: boolean;
190
+ scoreRange: number;
191
+ assessmentDisagreement: boolean;
192
+ uniqueAssessments: string[];
193
+ maxScore: number;
194
+ minScore: number;
195
+ };
196
+ biasDetection?: {
197
+ individual: BiasDetectionResult[];
198
+ position: PositionBiasResult;
199
+ };
200
+ individualJudgments: Array<{
201
+ judgeIndex: number;
202
+ score: number | null;
203
+ assessment: string | null;
204
+ issues: string[];
205
+ reasoning: string | null;
206
+ provider: string;
207
+ error?: string;
208
+ }>;
209
+ judgeCount: number;
210
+ votingMethod: string;
211
+ }
212
+
213
+ export class EnsembleJudge {
214
+ constructor(options?: EnsembleJudgeOptions);
215
+ evaluate(imagePath: string, prompt: string, context?: Record<string, unknown>): Promise<EnsembleResult>;
216
+ }
217
+
218
+ export function createEnsembleJudge(providers?: string[], options?: EnsembleJudgeOptions): EnsembleJudge;
219
+
220
+ // Core Types
221
+ export interface ValidationContext {
222
+ testType?: string;
223
+ viewport?: { width: number; height: number };
224
+ gameState?: Record<string, unknown>;
225
+ useCache?: boolean;
226
+ timeout?: number;
227
+ useRubric?: boolean;
228
+ includeDimensions?: boolean;
229
+ url?: string;
230
+ description?: string;
231
+ step?: string;
232
+ promptBuilder?: (prompt: string, context: ValidationContext) => string;
233
+ }
234
+
235
+ export interface EstimatedCost {
236
+ inputTokens: number;
237
+ outputTokens: number;
238
+ inputCost: string;
239
+ outputCost: string;
240
+ totalCost: string;
241
+ currency: string;
242
+ }
243
+
244
+ export interface SemanticInfo {
245
+ score: number | null;
246
+ issues: string[];
247
+ assessment: string | null;
248
+ reasoning: string;
249
+ brutalistViolations?: string[];
250
+ zeroToleranceViolations?: string[];
251
+ }
252
+
253
+ export interface ValidationResult {
254
+ enabled: boolean;
255
+ provider: string;
256
+ score: number | null;
257
+ issues: string[];
258
+ assessment: string | null;
259
+ reasoning: string;
260
+ estimatedCost?: EstimatedCost | null;
261
+ responseTime: number;
262
+ cached?: boolean;
263
+ judgment?: string;
264
+ raw?: unknown;
265
+ semantic?: SemanticInfo;
266
+ error?: string;
267
+ message?: string;
268
+ pricing?: { input: number; output: number };
269
+ timestamp?: string;
270
+ testName?: string;
271
+ viewport?: { width: number; height: number } | null;
272
+ }
273
+
274
+ export interface ConfigOptions {
275
+ provider?: 'gemini' | 'openai' | 'claude' | null;
276
+ apiKey?: string | null;
277
+ env?: NodeJS.ProcessEnv;
278
+ cacheDir?: string | null;
279
+ cacheEnabled?: boolean;
280
+ maxConcurrency?: number;
281
+ timeout?: number;
282
+ verbose?: boolean;
283
+ }
284
+
285
+ export interface Config {
286
+ provider: string;
287
+ apiKey: string | null;
288
+ providerConfig: {
289
+ name: string;
290
+ apiUrl: string;
291
+ model: string;
292
+ freeTier: boolean;
293
+ pricing: { input: number; output: number };
294
+ priority: number;
295
+ };
296
+ enabled: boolean;
297
+ cache: {
298
+ enabled: boolean;
299
+ dir: string | null;
300
+ };
301
+ performance: {
302
+ maxConcurrency: number;
303
+ timeout: number;
304
+ };
305
+ debug: {
306
+ verbose: boolean;
307
+ };
308
+ }
309
+
310
+ // VLLMJudge Class
311
+ export class VLLMJudge {
312
+ constructor(options?: ConfigOptions);
313
+ provider: string;
314
+ apiKey: string | null;
315
+ providerConfig: Config['providerConfig'];
316
+ enabled: boolean;
317
+
318
+ imageToBase64(imagePath: string): string;
319
+ buildPrompt(prompt: string, context: ValidationContext): string;
320
+ extractSemanticInfo(judgment: string | object): SemanticInfo;
321
+ estimateCost(data: unknown, provider: string): EstimatedCost | null;
322
+ judgeScreenshot(imagePath: string, prompt: string, context?: ValidationContext): Promise<ValidationResult>;
323
+ }
324
+
325
+ // Core Functions
326
+ export function validateScreenshot(
327
+ imagePath: string,
328
+ prompt: string,
329
+ context?: ValidationContext
330
+ ): Promise<ValidationResult>;
331
+
332
+ export function extractSemanticInfo(judgment: string | object): SemanticInfo;
333
+
334
+ // Multi-Modal Types
335
+ export interface RenderedCode {
336
+ html: string;
337
+ criticalCSS: Record<string, Record<string, string>>;
338
+ domStructure: {
339
+ prideParade?: {
340
+ computedTop: string;
341
+ flagRowCount: number;
342
+ };
343
+ footer?: {
344
+ computedBottom: string;
345
+ hasStripe: boolean;
346
+ };
347
+ paymentCode?: {
348
+ visible: boolean;
349
+ };
350
+ };
351
+ }
352
+
353
+ export interface TemporalScreenshot {
354
+ path: string;
355
+ timestamp: number;
356
+ elapsed: number;
357
+ }
358
+
359
+ export interface Persona {
360
+ name: string;
361
+ perspective: string;
362
+ focus: string[];
363
+ }
364
+
365
+ export interface PerspectiveEvaluation {
366
+ persona: Persona;
367
+ evaluation: ValidationResult;
368
+ }
369
+
370
+ // Multi-Modal Functions
371
+ export function extractRenderedCode(page: any): Promise<RenderedCode>;
372
+ export function captureTemporalScreenshots(
373
+ page: any,
374
+ fps?: number,
375
+ duration?: number
376
+ ): Promise<TemporalScreenshot[]>;
377
+ export function multiPerspectiveEvaluation(
378
+ validateFn: ValidationFunction,
379
+ screenshotPath: string,
380
+ renderedCode: RenderedCode,
381
+ gameState?: Record<string, unknown>,
382
+ personas?: Persona[] | null
383
+ ): Promise<PerspectiveEvaluation[]>;
384
+ export function multiModalValidation(
385
+ validateFn: ValidationFunction,
386
+ page: any,
387
+ testName: string,
388
+ options?: {
389
+ fps?: number;
390
+ duration?: number;
391
+ captureCode?: boolean;
392
+ captureState?: boolean;
393
+ multiPerspective?: boolean;
394
+ }
395
+ ): Promise<{
396
+ screenshotPath: string;
397
+ renderedCode: RenderedCode | null;
398
+ gameState: Record<string, unknown>;
399
+ temporalScreenshots: TemporalScreenshot[];
400
+ perspectives: PerspectiveEvaluation[];
401
+ codeValidation: Record<string, boolean>;
402
+ aggregatedScore: number | null;
403
+ aggregatedIssues: string[];
404
+ timestamp: number;
405
+ }>;
406
+
407
+ // Temporal Types
408
+ export interface TemporalNote {
409
+ timestamp?: number;
410
+ elapsed?: number;
411
+ score?: number;
412
+ observation?: string;
413
+ step?: string;
414
+ }
415
+
416
+ export interface TemporalWindow {
417
+ index: number;
418
+ startTime: number;
419
+ endTime: number;
420
+ notes: TemporalNote[];
421
+ weightedScore: number;
422
+ totalWeight: number;
423
+ avgScore: number;
424
+ observations: Set<string>;
425
+ }
426
+
427
+ export interface AggregatedTemporalNotes {
428
+ windows: TemporalWindow[];
429
+ summary: string;
430
+ coherence: number;
431
+ conflicts: Array<{
432
+ window1: number;
433
+ window2: number;
434
+ type: string;
435
+ description: string;
436
+ }>;
437
+ }
438
+
439
+ // Temporal Functions
440
+ export function aggregateTemporalNotes(
441
+ notes: TemporalNote[],
442
+ options?: {
443
+ windowSize?: number;
444
+ decayFactor?: number;
445
+ coherenceThreshold?: number;
446
+ }
447
+ ): AggregatedTemporalNotes;
448
+
449
+ export function formatNotesForPrompt(aggregated: AggregatedTemporalNotes): string;
450
+
451
+ export function calculateCoherence(windows: TemporalWindow[]): number;
452
+
453
+ // Cache Types
454
+ export interface CacheStats {
455
+ hits: number;
456
+ misses: number;
457
+ size: number;
458
+ hitRate: number;
459
+ }
460
+
461
+ // Cache Functions
462
+ export function initCache(cacheDir?: string): void;
463
+ export function generateCacheKey(imagePath: string, prompt: string, context?: ValidationContext): string;
464
+ export function getCached(imagePath: string, prompt: string, context?: ValidationContext): ValidationResult | null;
465
+ export function setCached(
466
+ imagePath: string,
467
+ prompt: string,
468
+ context: ValidationContext,
469
+ result: ValidationResult
470
+ ): void;
471
+ export function clearCache(): void;
472
+ export function getCacheStats(): CacheStats;
473
+
474
+ // Config Functions
475
+ export function createConfig(options?: ConfigOptions): Config;
476
+ export function getConfig(): Config;
477
+ export function setConfig(config: Config): void;
478
+ export function getProvider(providerName?: string | null): Config['providerConfig'];
479
+
480
+ // Utility Functions
481
+ export function loadEnv(basePath?: string | null): void;
482
+ export function initErrorHandlers(): void;
483
+
484
+ // ScoreTracker Class
485
+ export class ScoreTracker {
486
+ constructor(options?: { baselineDir?: string; autoSave?: boolean });
487
+ record(testName: string, score: number, metadata?: Record<string, unknown>): { score: number; timestamp: string; metadata: Record<string, unknown> };
488
+ getBaseline(testName: string): number | null;
489
+ getCurrent(testName: string): number | null;
490
+ compare(testName: string, currentScore: number): { hasBaseline: boolean; baseline: number | null; current: number; improved: boolean; delta: number; percentage: number; regression?: boolean; trend?: string; history?: Array<{ score: number; timestamp: string; metadata?: Record<string, unknown> }> } | null;
491
+ updateBaseline(testName: string, newBaseline?: number | null): boolean;
492
+ getAll(): Record<string, { history: Array<{ score: number; timestamp: string; metadata?: Record<string, unknown> }>; current: number | null; baseline: number | null; firstRecorded: string; lastUpdated: string; baselineSetAt?: string }>;
493
+ getStats(): {
494
+ current: number | null;
495
+ baseline: number | null;
496
+ history: Array<{ score: number; timestamp: number; metadata?: Record<string, unknown> }>;
497
+ average: number | null;
498
+ min: number | null;
499
+ max: number | null;
500
+ totalTests?: number;
501
+ testsWithBaselines?: number;
502
+ testsWithRegressions?: number;
503
+ testsWithImprovements?: number;
504
+ averageScore?: number;
505
+ averageBaseline?: number;
506
+ };
507
+ }
508
+
509
+ // BatchOptimizer Class
510
+ export class BatchOptimizer {
511
+ constructor(options?: { maxConcurrency?: number; batchSize?: number; cacheEnabled?: boolean });
512
+ batchValidate(imagePaths: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult[]>;
513
+ clearCache(): void;
514
+ getCacheStats(): { cacheSize: number; queueLength: number; activeRequests: number };
515
+ }
516
+
517
+ // Data Extractor
518
+ export function extractStructuredData(
519
+ text: string,
520
+ schema: object,
521
+ options?: {
522
+ method?: 'json' | 'llm' | 'regex';
523
+ provider?: string;
524
+ apiKey?: string;
525
+ }
526
+ ): Promise<unknown>;
527
+
528
+ // Feedback Aggregator
529
+ export interface AggregatedFeedback {
530
+ averageScore: number;
531
+ totalIssues: number;
532
+ commonIssues: Array<{ issue: string; count: number }>;
533
+ scoreDistribution: Record<string, number>;
534
+ recommendations: string[];
535
+ }
536
+
537
+ export function aggregateFeedback(judgeResults: ValidationResult[]): AggregatedFeedback;
538
+ export function generateRecommendations(aggregated: AggregatedFeedback): string[];
539
+
540
+ // Context Compressor
541
+ export function compressContext(
542
+ notes: TemporalNote[],
543
+ options?: {
544
+ maxLength?: number;
545
+ preserveImportant?: boolean;
546
+ }
547
+ ): TemporalNote[];
548
+
549
+ export function compressStateHistory(
550
+ stateHistory: Array<Record<string, unknown>>,
551
+ options?: {
552
+ maxLength?: number;
553
+ preserveImportant?: boolean;
554
+ }
555
+ ): Array<Record<string, unknown>>;
556
+
557
+ // Persona Experience
558
+ export interface PersonaExperienceOptions {
559
+ viewport?: { width: number; height: number };
560
+ device?: string;
561
+ darkMode?: boolean;
562
+ timeScale?: 'human' | 'mechanical';
563
+ captureScreenshots?: boolean;
564
+ captureState?: boolean;
565
+ captureCode?: boolean;
566
+ notes?: TemporalNote[];
567
+ }
568
+
569
+ export interface PersonaExperienceResult {
570
+ persona: Persona;
571
+ notes: TemporalNote[];
572
+ screenshots: TemporalScreenshot[];
573
+ renderedCode?: RenderedCode;
574
+ gameState?: Record<string, unknown>;
575
+ evaluation?: ValidationResult;
576
+ timestamp: number;
577
+ }
578
+
579
+ export function experiencePageAsPersona(
580
+ page: any,
581
+ persona: Persona,
582
+ options?: PersonaExperienceOptions
583
+ ): Promise<PersonaExperienceResult>;
584
+
585
+ export function experiencePageWithPersonas(
586
+ page: any,
587
+ personas: Persona[],
588
+ options?: PersonaExperienceOptions
589
+ ): Promise<PersonaExperienceResult[]>;
590
+
591
+ // Type Guards
592
+ export function isObject<T>(value: unknown): value is Record<string, T>;
593
+ export function isString(value: unknown): value is string;
594
+ export function isNumber(value: unknown): value is number;
595
+ export function isPositiveInteger(value: unknown): value is number;
596
+ export function isNonEmptyString(value: unknown): value is string;
597
+ export function isArray<T>(value: unknown): value is T[];
598
+ export function isFunction(value: unknown): value is Function;
599
+ export function isPromise<T>(value: unknown): value is Promise<T>;
600
+ export function isValidationResult(value: unknown): value is ValidationResult;
601
+ export function isValidationContext(value: unknown): value is ValidationContext;
602
+ export function isPersona(value: unknown): value is Persona;
603
+ export function isTemporalNote(value: unknown): value is TemporalNote;
604
+
605
+ // Type Assertions
606
+ export function assertObject<T>(value: unknown, name?: string): asserts value is Record<string, T>;
607
+ export function assertString(value: unknown, name?: string): asserts value is string;
608
+ export function assertNonEmptyString(value: unknown, name?: string): asserts value is string;
609
+ export function assertNumber(value: unknown, name?: string): asserts value is number;
610
+ export function assertArray<T>(value: unknown, name?: string): asserts value is T[];
611
+ export function assertFunction(value: unknown, name?: string): asserts value is Function;
612
+
613
+ // Utility Functions
614
+ export function pick<T, K extends keyof T>(obj: T, keys: K[]): Pick<T, K>;
615
+ export function getProperty<T, D>(obj: T, key: string, defaultValue: D): T[keyof T] | D;
616
+
617
+ // Experience Tracer
618
+ export class ExperienceTrace {
619
+ constructor(sessionId: string, persona?: Persona | null);
620
+ sessionId: string;
621
+ persona: Persona | null;
622
+ startTime: number;
623
+ events: Array<Record<string, unknown>>;
624
+ validations: Array<Record<string, unknown>>;
625
+ screenshots: Array<Record<string, unknown>>;
626
+ stateHistory: Array<Record<string, unknown>>;
627
+ aggregatedNotes: AggregatedTemporalNotes | null;
628
+ metaEvaluation: Record<string, unknown> | null;
629
+
630
+ addEvent(type: string, data: Record<string, unknown>, timestamp?: number | null): Record<string, unknown>;
631
+ addValidation(validation: ValidationResult, context?: Record<string, unknown>): Record<string, unknown>;
632
+ addScreenshot(path: string, step: string, metadata?: Record<string, unknown>): Record<string, unknown>;
633
+ addStateSnapshot(state: Record<string, unknown>, label?: string): Record<string, unknown>;
634
+ aggregateNotes(
635
+ aggregateTemporalNotes: (notes: TemporalNote[], options?: Record<string, unknown>) => AggregatedTemporalNotes,
636
+ options?: Record<string, unknown>
637
+ ): AggregatedTemporalNotes;
638
+ getSummary(): Record<string, unknown>;
639
+ getFullTrace(): Record<string, unknown>;
640
+ exportToJSON(filePath: string): Promise<void>;
641
+ }
642
+
643
+ export class ExperienceTracerManager {
644
+ constructor();
645
+ createTrace(sessionId: string, persona?: Persona | null): ExperienceTrace;
646
+ getTrace(sessionId: string): ExperienceTrace | null;
647
+ getAllTraces(): ExperienceTrace[];
648
+ metaEvaluateTrace(
649
+ sessionId: string,
650
+ validateScreenshot: ValidationFunction
651
+ ): Promise<Record<string, unknown>>;
652
+ getMetaEvaluationSummary(): {
653
+ totalEvaluations: number;
654
+ averageQuality: number | null;
655
+ evaluations?: Array<Record<string, unknown>>;
656
+ };
657
+ }
658
+
659
+ export function getTracerManager(): ExperienceTracerManager;
660
+
661
+ // Position Counter-Balance
662
+ export interface CounterBalanceOptions {
663
+ enabled?: boolean;
664
+ baselinePath?: string | null;
665
+ contextOrder?: 'original' | 'reversed';
666
+ }
667
+
668
+ export interface CounterBalancedResult extends ValidationResult {
669
+ counterBalanced: boolean;
670
+ originalScore: number | null;
671
+ reversedScore: number | null;
672
+ scoreDifference: number | null;
673
+ metadata: {
674
+ counterBalancing: {
675
+ enabled: boolean;
676
+ originalResult: ValidationResult;
677
+ reversedResult: ValidationResult;
678
+ positionBiasDetected: boolean;
679
+ };
680
+ };
681
+ }
682
+
683
+ export function evaluateWithCounterBalance(
684
+ evaluateFn: ValidationFunction<ValidationResult>,
685
+ imagePath: string,
686
+ prompt: string,
687
+ context?: ValidationContext,
688
+ options?: CounterBalanceOptions
689
+ ): Promise<CounterBalancedResult>;
690
+
691
+ export function shouldUseCounterBalance(context: ValidationContext): boolean;
692
+
693
+ // Dynamic Few-Shot Examples
694
+ export interface FewShotExample {
695
+ description?: string;
696
+ evaluation?: string;
697
+ score?: number | null;
698
+ screenshot?: string;
699
+ quality?: string;
700
+ result?: {
701
+ score?: number | null;
702
+ reasoning?: string;
703
+ };
704
+ json?: unknown;
705
+ }
706
+
707
+ export interface FewShotOptions {
708
+ maxExamples?: number;
709
+ similarityThreshold?: number;
710
+ useSemanticMatching?: boolean;
711
+ }
712
+
713
+ export function selectFewShotExamples(
714
+ prompt: string,
715
+ examples?: FewShotExample[],
716
+ options?: FewShotOptions
717
+ ): FewShotExample[];
718
+
719
+ export function formatFewShotExamples(
720
+ examples: FewShotExample[],
721
+ format?: 'default' | 'json'
722
+ ): string;
723
+
724
+ // Metrics
725
+ export function spearmanCorrelation(
726
+ x: Array<number | null>,
727
+ y: Array<number | null>
728
+ ): number | null;
729
+
730
+ export function pearsonCorrelation(
731
+ x: Array<number | null>,
732
+ y: Array<number | null>
733
+ ): number | null;
734
+
735
+ export interface RankAgreementResult {
736
+ spearman: number | null;
737
+ pearson: number | null;
738
+ kendall: number | null;
739
+ exactMatches: number;
740
+ totalItems: number;
741
+ agreementRate: number;
742
+ }
743
+
744
+ export function calculateRankAgreement(
745
+ ranking1: Array<number | null>,
746
+ ranking2: Array<number | null>
747
+ ): RankAgreementResult;
748
+
749
+ // Validators
750
+ export interface StateValidatorOptions<T = unknown> {
751
+ tolerance?: number;
752
+ validateScreenshot?: ValidationFunction;
753
+ stateExtractor?: (result: ValidationResult, expected: T) => Partial<T>;
754
+ stateComparator?: (extracted: Partial<T>, expected: T, options: { tolerance: number }) => {
755
+ matches: boolean;
756
+ discrepancies: string[];
757
+ };
758
+ }
759
+
760
+ export interface StateValidationOptions<T = unknown> {
761
+ promptBuilder?: (expected: T, options: Record<string, unknown>) => string;
762
+ testType?: string;
763
+ context?: Record<string, unknown>;
764
+ stateDescription?: string;
765
+ extractionTasks?: string[];
766
+ }
767
+
768
+ export interface StateValidationResult<T = unknown> extends ValidationResult {
769
+ extractedState: Partial<T>;
770
+ expectedState: T;
771
+ validation: {
772
+ matches: boolean;
773
+ discrepancies: string[];
774
+ };
775
+ matches: boolean;
776
+ }
777
+
778
+ export class StateValidator<T = unknown> {
779
+ constructor(options?: StateValidatorOptions<T>);
780
+ static validate<T = unknown>(
781
+ screenshotPath: string | string[],
782
+ expectedState: T,
783
+ options?: StateValidationOptions<T>
784
+ ): Promise<StateValidationResult<T>>;
785
+ validateState(
786
+ screenshotPath: string | string[],
787
+ expectedState: T,
788
+ options?: StateValidationOptions<T>
789
+ ): Promise<StateValidationResult<T>>;
790
+ buildStatePrompt(expectedState: T, options?: StateValidationOptions<T>): string;
791
+ }
792
+
793
+ export interface AccessibilityValidatorOptions {
794
+ minContrast?: number;
795
+ standards?: string[];
796
+ zeroTolerance?: boolean;
797
+ validateScreenshot?: ValidationFunction;
798
+ }
799
+
800
+ export interface AccessibilityOptions {
801
+ customPrompt?: string;
802
+ minContrast?: number;
803
+ standards?: string[];
804
+ testType?: string;
805
+ [key: string]: unknown;
806
+ }
807
+
808
+ export interface AccessibilityResult extends ValidationResult {
809
+ violations: {
810
+ zeroTolerance: string[];
811
+ critical: string[];
812
+ warnings: string[];
813
+ };
814
+ passes: boolean;
815
+ contrastCheck: {
816
+ ratios: string[];
817
+ minRatio: number | null;
818
+ meetsRequirement: boolean | null;
819
+ };
820
+ standards: string[];
821
+ }
822
+
823
+ export class AccessibilityValidator {
824
+ constructor(options?: AccessibilityValidatorOptions);
825
+ static validate(
826
+ screenshotPath: string | string[],
827
+ options?: AccessibilityOptions
828
+ ): Promise<AccessibilityResult>;
829
+ validateAccessibility(
830
+ screenshotPath: string | string[],
831
+ options?: AccessibilityOptions
832
+ ): Promise<AccessibilityResult>;
833
+ buildAccessibilityPrompt(options?: AccessibilityOptions): string;
834
+ detectViolations(result: ValidationResult): {
835
+ zeroTolerance: string[];
836
+ critical: string[];
837
+ warnings: string[];
838
+ };
839
+ extractContrastInfo(result: ValidationResult): {
840
+ ratios: string[];
841
+ minRatio: number | null;
842
+ meetsRequirement: boolean | null;
843
+ };
844
+ }
845
+
846
+ export type PromptTemplate = (variables: Record<string, unknown>, context?: Record<string, unknown>) => string;
847
+
848
+ export interface PromptBuilderOptions {
849
+ templates?: Record<string, PromptTemplate | string>;
850
+ rubric?: Rubric;
851
+ defaultContext?: Record<string, unknown>;
852
+ }
853
+
854
+ export interface PromptOptions {
855
+ variables?: Record<string, unknown>;
856
+ context?: Record<string, unknown>;
857
+ includeRubric?: boolean;
858
+ includeZeroTolerance?: boolean;
859
+ includeScoring?: boolean;
860
+ enforceZeroTolerance?: boolean;
861
+ rubric?: Rubric;
862
+ }
863
+
864
+ export class PromptBuilder {
865
+ constructor(options?: PromptBuilderOptions);
866
+ buildPrompt(basePrompt: string, options?: PromptOptions): string;
867
+ buildFromTemplate(templateName: string, variables?: Record<string, unknown>, options?: PromptOptions): string;
868
+ registerTemplate(name: string, template: PromptTemplate | string): void;
869
+ }
870
+
871
+ export interface RubricOptions {
872
+ enforceZeroTolerance?: boolean;
873
+ includeZeroTolerance?: boolean;
874
+ includeScoring?: boolean;
875
+ }
876
+
877
+ export interface RubricCriterion {
878
+ id: string;
879
+ rule: string;
880
+ weight?: number;
881
+ zeroTolerance?: boolean;
882
+ penalty?: number;
883
+ description?: string;
884
+ }
885
+
886
+ export interface ExtendedRubric extends Rubric {
887
+ criteria?: RubricCriterion[];
888
+ name?: string;
889
+ description?: string;
890
+ }
891
+
892
+ export function validateWithRubric(
893
+ screenshotPath: string,
894
+ prompt: string,
895
+ rubric: ExtendedRubric,
896
+ context?: ValidationContext,
897
+ options?: RubricOptions
898
+ ): Promise<ValidationResult & { zeroToleranceViolation?: boolean }>;
899
+
900
+ export interface BatchValidatorOptions {
901
+ maxConcurrency?: number;
902
+ batchSize?: number;
903
+ cacheEnabled?: boolean;
904
+ trackCosts?: boolean;
905
+ trackStats?: boolean;
906
+ }
907
+
908
+ export interface BatchValidationStats {
909
+ total: number;
910
+ passed: number;
911
+ failed: number;
912
+ duration: number;
913
+ costStats: ReturnType<CostTracker['getStats']> | null;
914
+ performance: {
915
+ totalRequests: number;
916
+ avgDuration: number;
917
+ minDuration: number;
918
+ maxDuration: number;
919
+ successRate: number;
920
+ } | null;
921
+ }
922
+
923
+ export interface BatchValidationResult {
924
+ results: ValidationResult[];
925
+ stats: BatchValidationStats | null;
926
+ }
927
+
928
+ export class BatchValidator extends BatchOptimizer {
929
+ constructor(options?: BatchValidatorOptions);
930
+ batchValidate(
931
+ screenshots: string | string[],
932
+ prompt: string,
933
+ context?: ValidationContext
934
+ ): Promise<BatchValidationResult>;
935
+ getCostStats(): ReturnType<CostTracker['getStats']>;
936
+ getPerformanceStats(): {
937
+ totalRequests: number;
938
+ avgDuration: number;
939
+ minDuration: number;
940
+ maxDuration: number;
941
+ successRate: number;
942
+ };
943
+ resetStats(): void;
944
+ }
945
+
946
+ // Programmatic Validators (fast, deterministic)
947
+ // Use these when you have Playwright page access and need fast feedback (<100ms)
948
+
949
+ /**
950
+ * Calculate contrast ratio between two colors (WCAG algorithm)
951
+ *
952
+ * @param color1 - First color (rgb, rgba, or hex)
953
+ * @param color2 - Second color (rgb, rgba, or hex)
954
+ * @returns Contrast ratio (1.0 to 21.0+)
955
+ */
956
+ export function getContrastRatio(color1: string, color2: string): number;
957
+
958
+ /**
959
+ * Contrast check result for a single element
960
+ */
961
+ export interface ElementContrastResult {
962
+ ratio: number;
963
+ passes: boolean;
964
+ foreground: string;
965
+ background: string;
966
+ foregroundRgb?: [number, number, number];
967
+ backgroundRgb?: [number, number, number];
968
+ error?: string;
969
+ selector?: string;
970
+ }
971
+
972
+ /**
973
+ * Check contrast ratio for an element
974
+ *
975
+ * @param page - Playwright page object
976
+ * @param selector - CSS selector for element
977
+ * @param minRatio - Minimum required contrast ratio (default: 4.5 for WCAG-AA)
978
+ * @returns Contrast check result
979
+ */
980
+ export function checkElementContrast(
981
+ page: any,
982
+ selector: string,
983
+ minRatio?: number
984
+ ): Promise<ElementContrastResult>;
985
+
986
+ /**
987
+ * Text contrast check result for all text elements
988
+ */
989
+ export interface AllTextContrastResult {
990
+ total: number;
991
+ passing: number;
992
+ failing: number;
993
+ violations: Array<{
994
+ element: string;
995
+ ratio: string;
996
+ required: number;
997
+ foreground: string;
998
+ background: string;
999
+ }>;
1000
+ elements?: Array<{
1001
+ tag: string;
1002
+ id: string;
1003
+ className: string;
1004
+ ratio: number;
1005
+ passes: boolean;
1006
+ foreground: string;
1007
+ background: string;
1008
+ }>;
1009
+ }
1010
+
1011
+ /**
1012
+ * Check contrast for all text elements on page
1013
+ *
1014
+ * @param page - Playwright page object
1015
+ * @param minRatio - Minimum required contrast ratio (default: 4.5 for WCAG-AA)
1016
+ * @returns Contrast check results for all text elements
1017
+ */
1018
+ export function checkAllTextContrast(
1019
+ page: any,
1020
+ minRatio?: number
1021
+ ): Promise<AllTextContrastResult>;
1022
+
1023
+ /**
1024
+ * Keyboard navigation check result
1025
+ */
1026
+ export interface KeyboardNavigationResult {
1027
+ keyboardAccessible: boolean;
1028
+ focusableElements: number;
1029
+ violations: Array<{
1030
+ element: string;
1031
+ issue: string;
1032
+ }>;
1033
+ focusableSelectors: string[];
1034
+ }
1035
+
1036
+ /**
1037
+ * Check keyboard navigation accessibility
1038
+ *
1039
+ * @param page - Playwright page object
1040
+ * @returns Keyboard navigation check result
1041
+ */
1042
+ export function checkKeyboardNavigation(page: any): Promise<KeyboardNavigationResult>;
1043
+
1044
+ /**
1045
+ * Programmatic state validation options
1046
+ */
1047
+ export interface ProgrammaticStateOptions {
1048
+ selectors?: Record<string, string>;
1049
+ tolerance?: number;
1050
+ stateExtractor?: (page: any) => Promise<unknown>;
1051
+ }
1052
+
1053
+ /**
1054
+ * Programmatic state validation result
1055
+ */
1056
+ export interface ProgrammaticStateResult {
1057
+ matches: boolean;
1058
+ discrepancies: string[];
1059
+ visualState: Record<string, {
1060
+ x: number;
1061
+ y: number;
1062
+ width: number;
1063
+ height: number;
1064
+ visible: boolean;
1065
+ } | null>;
1066
+ expectedState: Record<string, unknown>;
1067
+ gameState?: unknown;
1068
+ }
1069
+
1070
+ /**
1071
+ * Validate state matches visual representation
1072
+ *
1073
+ * @param page - Playwright page object
1074
+ * @param expectedState - Expected state object
1075
+ * @param options - Validation options
1076
+ * @returns State validation result
1077
+ */
1078
+ export function validateStateProgrammatic(
1079
+ page: any,
1080
+ expectedState: Record<string, unknown>,
1081
+ options?: ProgrammaticStateOptions
1082
+ ): Promise<ProgrammaticStateResult>;
1083
+
1084
+ /**
1085
+ * Element position validation result
1086
+ */
1087
+ export interface ElementPositionResult {
1088
+ matches: boolean;
1089
+ actual: {
1090
+ x: number;
1091
+ y: number;
1092
+ width: number;
1093
+ height: number;
1094
+ };
1095
+ expected: {
1096
+ x?: number;
1097
+ y?: number;
1098
+ width?: number;
1099
+ height?: number;
1100
+ };
1101
+ diff: {
1102
+ x: number;
1103
+ y: number;
1104
+ width?: number;
1105
+ height?: number;
1106
+ };
1107
+ tolerance: number;
1108
+ error?: string;
1109
+ selector?: string;
1110
+ }
1111
+
1112
+ /**
1113
+ * Validate element position matches expected position
1114
+ *
1115
+ * @param page - Playwright page object
1116
+ * @param selector - CSS selector for element
1117
+ * @param expectedPosition - Expected position {x, y} or {x, y, width, height}
1118
+ * @param tolerance - Pixel tolerance (default: 5)
1119
+ * @returns Position validation result
1120
+ */
1121
+ export function validateElementPosition(
1122
+ page: any,
1123
+ selector: string,
1124
+ expectedPosition: {
1125
+ x?: number;
1126
+ y?: number;
1127
+ width?: number;
1128
+ height?: number;
1129
+ },
1130
+ tolerance?: number
1131
+ ): Promise<ElementPositionResult>;
1132
+
1133
+ // Hybrid Validators (Programmatic + VLLM)
1134
+ // Combine programmatic data with semantic LLM evaluation
1135
+
1136
+ /**
1137
+ * Hybrid accessibility validation result
1138
+ */
1139
+ export interface AccessibilityHybridResult extends ValidationResult {
1140
+ programmaticData: {
1141
+ contrast: AllTextContrastResult;
1142
+ keyboard: KeyboardNavigationResult;
1143
+ };
1144
+ }
1145
+
1146
+ /**
1147
+ * Hybrid accessibility validation
1148
+ * Combines programmatic contrast/keyboard checks with VLLM semantic evaluation
1149
+ *
1150
+ * @param page - Playwright page object
1151
+ * @param screenshotPath - Path to screenshot
1152
+ * @param minContrast - Minimum contrast ratio (default: 4.5)
1153
+ * @param options - Validation options
1154
+ * @returns Hybrid validation result with programmatic data
1155
+ */
1156
+ export function validateAccessibilityHybrid(
1157
+ page: any,
1158
+ screenshotPath: string,
1159
+ minContrast?: number,
1160
+ options?: ValidationContext
1161
+ ): Promise<AccessibilityHybridResult>;
1162
+
1163
+ /**
1164
+ * Hybrid state validation result
1165
+ */
1166
+ export interface StateHybridResult extends ValidationResult {
1167
+ programmaticData: {
1168
+ gameState?: unknown;
1169
+ visualState: Record<string, {
1170
+ x: number;
1171
+ y: number;
1172
+ width: number;
1173
+ height: number;
1174
+ visible: boolean;
1175
+ } | null>;
1176
+ discrepancies: string[];
1177
+ matches: boolean;
1178
+ };
1179
+ }
1180
+
1181
+ /**
1182
+ * Hybrid state validation
1183
+ * Combines programmatic state extraction with VLLM semantic evaluation
1184
+ *
1185
+ * @param page - Playwright page object
1186
+ * @param screenshotPath - Path to screenshot
1187
+ * @param expectedState - Expected state object
1188
+ * @param options - Validation options
1189
+ * @returns Hybrid validation result with programmatic data
1190
+ */
1191
+ export function validateStateHybrid(
1192
+ page: any,
1193
+ screenshotPath: string,
1194
+ expectedState: Record<string, unknown>,
1195
+ options?: ProgrammaticStateOptions & ValidationContext
1196
+ ): Promise<StateHybridResult>;
1197
+
1198
+ /**
1199
+ * Generic hybrid validator result
1200
+ */
1201
+ export interface HybridValidationResult extends ValidationResult {
1202
+ programmaticData: Record<string, unknown>;
1203
+ }
1204
+
1205
+ /**
1206
+ * Generic hybrid validator helper
1207
+ * Combines any programmatic data with VLLM evaluation
1208
+ *
1209
+ * @param screenshotPath - Path to screenshot
1210
+ * @param prompt - Base evaluation prompt
1211
+ * @param programmaticData - Programmatic validation data
1212
+ * @param options - Validation options
1213
+ * @returns Hybrid validation result with programmatic data
1214
+ */
1215
+ export function validateWithProgrammaticContext(
1216
+ screenshotPath: string,
1217
+ prompt: string,
1218
+ programmaticData: Record<string, unknown>,
1219
+ options?: ValidationContext
1220
+ ): Promise<HybridValidationResult>;
1221
+