@arclabs561/ai-visual-test 0.5.1 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +127 -11
- package/DEPLOYMENT.md +225 -9
- package/README.md +71 -80
- package/index.d.ts +902 -5
- package/package.json +10 -51
- package/src/batch-optimizer.mjs +39 -0
- package/src/cache.mjs +241 -16
- package/src/config.mjs +33 -91
- package/src/constants.mjs +54 -0
- package/src/convenience.mjs +113 -10
- package/src/cost-optimization.mjs +1 -0
- package/src/cost-tracker.mjs +134 -2
- package/src/data-extractor.mjs +36 -7
- package/src/dynamic-few-shot.mjs +69 -11
- package/src/errors.mjs +6 -2
- package/src/experience-propagation.mjs +12 -0
- package/src/experience-tracer.mjs +12 -3
- package/src/game-player.mjs +222 -43
- package/src/graceful-shutdown.mjs +126 -0
- package/src/helpers/playwright.mjs +22 -8
- package/src/human-validation-manager.mjs +99 -2
- package/src/index.mjs +48 -3
- package/src/integrations/playwright.mjs +140 -0
- package/src/judge.mjs +699 -24
- package/src/load-env.mjs +2 -1
- package/src/logger.mjs +31 -3
- package/src/model-tier-selector.mjs +1 -221
- package/src/natural-language-specs.mjs +31 -3
- package/src/persona-enhanced.mjs +4 -2
- package/src/persona-experience.mjs +1 -1
- package/src/pricing.mjs +28 -0
- package/src/prompt-composer.mjs +162 -5
- package/src/provider-data.mjs +115 -0
- package/src/render-change-detector.mjs +5 -0
- package/src/research-enhanced-validation.mjs +7 -5
- package/src/retry.mjs +21 -7
- package/src/rubrics.mjs +4 -0
- package/src/safe-logger.mjs +71 -0
- package/src/session-cost-tracker.mjs +320 -0
- package/src/smart-validator.mjs +8 -8
- package/src/spec-templates.mjs +52 -6
- package/src/startup-validation.mjs +127 -0
- package/src/temporal-adaptive.mjs +2 -2
- package/src/temporal-decision-manager.mjs +1 -271
- package/src/temporal-logic.mjs +104 -0
- package/src/temporal-note-pruner.mjs +119 -0
- package/src/temporal-preprocessor.mjs +1 -543
- package/src/temporal.mjs +681 -79
- package/src/utils/action-hallucination-detector.mjs +301 -0
- package/src/utils/baseline-validator.mjs +82 -0
- package/src/utils/cache-stats.mjs +104 -0
- package/src/utils/cached-llm.mjs +164 -0
- package/src/utils/capability-stratifier.mjs +108 -0
- package/src/utils/counterfactual-tester.mjs +83 -0
- package/src/utils/error-recovery.mjs +117 -0
- package/src/utils/explainability-scorer.mjs +119 -0
- package/src/utils/exploratory-automation.mjs +131 -0
- package/src/utils/index.mjs +10 -0
- package/src/utils/intent-recognizer.mjs +201 -0
- package/src/utils/log-sanitizer.mjs +165 -0
- package/src/utils/path-validator.mjs +88 -0
- package/src/utils/performance-logger.mjs +316 -0
- package/src/utils/performance-measurement.mjs +280 -0
- package/src/utils/prompt-sanitizer.mjs +213 -0
- package/src/utils/rate-limiter.mjs +144 -0
- package/src/validation-framework.mjs +24 -20
- package/src/validation-result-normalizer.mjs +35 -1
- package/src/validation.mjs +75 -25
- package/src/validators/accessibility-validator.mjs +144 -0
- package/src/validators/hybrid-validator.mjs +48 -4
- package/api/health.js +0 -34
- package/api/validate.js +0 -252
- package/public/index.html +0 -149
- package/vercel.json +0 -27
package/index.d.ts
CHANGED
|
@@ -210,26 +210,126 @@ export interface EnsembleResult {
|
|
|
210
210
|
votingMethod: string;
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
/**
|
|
214
|
+
* Ensemble Judge
|
|
215
|
+
*
|
|
216
|
+
* Uses multiple LLM providers to evaluate screenshots and aggregates results
|
|
217
|
+
* for improved accuracy (10-20% improvement with 3+ models).
|
|
218
|
+
*
|
|
219
|
+
* **Research:** Based on arXiv:2510.01499 - "Optimal LLM Aggregation"
|
|
220
|
+
*
|
|
221
|
+
* **Use when:** You need maximum reliability for critical evaluations
|
|
222
|
+
* (accessibility, quality checks, design validation).
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* const judge = new EnsembleJudge({
|
|
227
|
+
* judges: [
|
|
228
|
+
* new VLLMJudge({ provider: 'gemini' }),
|
|
229
|
+
* new VLLMJudge({ provider: 'openai' }),
|
|
230
|
+
* new VLLMJudge({ provider: 'claude' })
|
|
231
|
+
* ],
|
|
232
|
+
* votingMethod: 'weighted_average'
|
|
233
|
+
* });
|
|
234
|
+
*
|
|
235
|
+
* const result = await judge.evaluate(
|
|
236
|
+
* 'screenshot.png',
|
|
237
|
+
* 'Evaluate accessibility'
|
|
238
|
+
* );
|
|
239
|
+
*
|
|
240
|
+
* console.log(result.score); // Aggregated score
|
|
241
|
+
* console.log(result.agreement.score); // How much models agree
|
|
242
|
+
* ```
|
|
243
|
+
*/
|
|
213
244
|
export class EnsembleJudge {
|
|
245
|
+
/**
|
|
246
|
+
* Create a new Ensemble Judge instance.
|
|
247
|
+
*
|
|
248
|
+
* @param options - Ensemble options (judges, voting method, weights, etc.)
|
|
249
|
+
*/
|
|
214
250
|
constructor(options?: EnsembleJudgeOptions);
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Evaluate screenshot using multiple judges and aggregate results.
|
|
254
|
+
*
|
|
255
|
+
* @param imagePath - Path to screenshot
|
|
256
|
+
* @param prompt - Evaluation prompt
|
|
257
|
+
* @param context - Optional validation context
|
|
258
|
+
* @returns Promise resolving to EnsembleResult with aggregated score and agreement metrics
|
|
259
|
+
*/
|
|
215
260
|
evaluate(imagePath: string, prompt: string, context?: Record<string, unknown>): Promise<EnsembleResult>;
|
|
216
261
|
}
|
|
217
262
|
|
|
218
263
|
export function createEnsembleJudge(providers?: string[], options?: EnsembleJudgeOptions): EnsembleJudge;
|
|
219
264
|
|
|
220
265
|
// Core Types
|
|
266
|
+
/**
|
|
267
|
+
* Validation context for screenshot validation.
|
|
268
|
+
*
|
|
269
|
+
* Provides additional context to guide the AI evaluation, including test type,
|
|
270
|
+
* viewport information, game state, and optimization options.
|
|
271
|
+
*
|
|
272
|
+
* @example
|
|
273
|
+
* ```typescript
|
|
274
|
+
* const context: ValidationContext = {
|
|
275
|
+
* testType: 'accessibility',
|
|
276
|
+
* viewport: { width: 1920, height: 1080 },
|
|
277
|
+
* autoSelectTier: true,
|
|
278
|
+
* autoSelectProvider: true
|
|
279
|
+
* };
|
|
280
|
+
* ```
|
|
281
|
+
*/
|
|
221
282
|
export interface ValidationContext {
|
|
283
|
+
/** Test type identifier (e.g., 'accessibility', 'payment-screen', 'gameplay') */
|
|
222
284
|
testType?: string;
|
|
285
|
+
/** Viewport dimensions for context-aware evaluation */
|
|
223
286
|
viewport?: { width: number; height: number };
|
|
287
|
+
/** Game state or application state for context */
|
|
224
288
|
gameState?: Record<string, unknown>;
|
|
289
|
+
/** Enable caching (default: true) */
|
|
225
290
|
useCache?: boolean;
|
|
291
|
+
/** Request timeout in milliseconds */
|
|
226
292
|
timeout?: number;
|
|
293
|
+
/** Use explicit rubric for consistent scoring */
|
|
227
294
|
useRubric?: boolean;
|
|
295
|
+
/** Include dimension scores in evaluation */
|
|
228
296
|
includeDimensions?: boolean;
|
|
297
|
+
/** URL of the page being tested */
|
|
229
298
|
url?: string;
|
|
299
|
+
/** Description of the test scenario */
|
|
230
300
|
description?: string;
|
|
301
|
+
/** Current step in multi-step test */
|
|
231
302
|
step?: string;
|
|
303
|
+
/** Custom prompt builder function */
|
|
232
304
|
promptBuilder?: (prompt: string, context: ValidationContext) => string;
|
|
305
|
+
/** Auto-select model tier (fast/balanced/best) based on context */
|
|
306
|
+
autoSelectTier?: boolean;
|
|
307
|
+
/** Auto-select provider (cheapest available) */
|
|
308
|
+
autoSelectProvider?: boolean;
|
|
309
|
+
/** Include cost comparison in results */
|
|
310
|
+
includeCostComparison?: boolean;
|
|
311
|
+
/** Frequency for high-frequency validation (Hz) */
|
|
312
|
+
frequency?: number;
|
|
313
|
+
/** Cost sensitivity flag for optimization */
|
|
314
|
+
costSensitive?: boolean;
|
|
315
|
+
/** Criticality level (low/medium/high/critical) */
|
|
316
|
+
criticality?: 'low' | 'medium' | 'high' | 'critical';
|
|
317
|
+
/** Model tier to use (fast/balanced/best) */
|
|
318
|
+
modelTier?: 'fast' | 'balanced' | 'best';
|
|
319
|
+
/** Temporal decision options (for high-frequency validation) */
|
|
320
|
+
useTemporalDecision?: boolean;
|
|
321
|
+
/** Temporal notes for decision context */
|
|
322
|
+
temporalNotes?: TemporalNote[];
|
|
323
|
+
/** Current state for temporal decision */
|
|
324
|
+
currentState?: Record<string, unknown>;
|
|
325
|
+
/** Previous state for temporal decision */
|
|
326
|
+
previousState?: Record<string, unknown>;
|
|
327
|
+
/** Previous result for temporal decision */
|
|
328
|
+
previousResult?: ValidationResult;
|
|
329
|
+
/** Temporal decision manager options */
|
|
330
|
+
temporalDecisionOptions?: Record<string, unknown>;
|
|
331
|
+
/** Per-call visual anchors (appended to config-level anchors) */
|
|
332
|
+
anchors?: VisualAnchors | null;
|
|
233
333
|
}
|
|
234
334
|
|
|
235
335
|
export interface EstimatedCost {
|
|
@@ -241,34 +341,168 @@ export interface EstimatedCost {
|
|
|
241
341
|
currency: string;
|
|
242
342
|
}
|
|
243
343
|
|
|
344
|
+
/** A structured issue with metadata (importance, evidence, suggestion). */
|
|
345
|
+
export interface RichIssue {
|
|
346
|
+
/** Human-readable issue description */
|
|
347
|
+
description: string;
|
|
348
|
+
/** Importance level */
|
|
349
|
+
importance?: 'low' | 'medium' | 'high' | 'critical';
|
|
350
|
+
/** Annoyance level */
|
|
351
|
+
annoyance?: 'low' | 'medium' | 'high';
|
|
352
|
+
/** Impact category */
|
|
353
|
+
impact?: string;
|
|
354
|
+
/** Evidence observed in the screenshot */
|
|
355
|
+
evidence?: string;
|
|
356
|
+
/** Suggested fix */
|
|
357
|
+
suggestion?: string;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/** A structured recommendation with priority and expected impact. */
|
|
361
|
+
export interface Recommendation {
|
|
362
|
+
/** Priority level */
|
|
363
|
+
priority?: 'low' | 'medium' | 'high';
|
|
364
|
+
/** What to change */
|
|
365
|
+
suggestion: string;
|
|
366
|
+
/** Expected improvement from the change */
|
|
367
|
+
expectedImpact?: string;
|
|
368
|
+
}
|
|
369
|
+
|
|
244
370
|
export interface SemanticInfo {
|
|
245
371
|
score: number | null;
|
|
246
|
-
issues:
|
|
372
|
+
issues: RichIssue[];
|
|
247
373
|
assessment: string | null;
|
|
248
|
-
reasoning: string;
|
|
374
|
+
reasoning: string | null;
|
|
375
|
+
strengths?: string[];
|
|
376
|
+
recommendations?: Recommendation[];
|
|
377
|
+
evidence?: string | Record<string, unknown> | null;
|
|
378
|
+
dimensionScores?: Record<string, number> | null;
|
|
249
379
|
brutalistViolations?: string[];
|
|
250
380
|
zeroToleranceViolations?: string[];
|
|
251
381
|
}
|
|
252
382
|
|
|
383
|
+
/**
|
|
384
|
+
* Result of screenshot validation.
|
|
385
|
+
*
|
|
386
|
+
* Contains the AI's evaluation of the screenshot, including score, issues,
|
|
387
|
+
* reasoning, and metadata about the validation process.
|
|
388
|
+
*
|
|
389
|
+
* @example
|
|
390
|
+
* ```typescript
|
|
391
|
+
* const result: ValidationResult = {
|
|
392
|
+
* enabled: true,
|
|
393
|
+
* provider: 'gemini',
|
|
394
|
+
* score: 8.5,
|
|
395
|
+
* issues: ['Low contrast on submit button'],
|
|
396
|
+
* assessment: 'Good',
|
|
397
|
+
* reasoning: 'The form is mostly accessible...',
|
|
398
|
+
* estimatedCost: { totalCost: '0.000123', currency: 'USD' },
|
|
399
|
+
* responseTime: 1234,
|
|
400
|
+
* cached: false
|
|
401
|
+
* };
|
|
402
|
+
* ```
|
|
403
|
+
*/
|
|
253
404
|
export interface ValidationResult {
|
|
405
|
+
/** Whether validation was enabled (false if API key missing) */
|
|
254
406
|
enabled: boolean;
|
|
407
|
+
/** LLM provider used (gemini, openai, claude, groq) */
|
|
255
408
|
provider: string;
|
|
409
|
+
/** Quality score (0-10, null if validation failed) */
|
|
256
410
|
score: number | null;
|
|
411
|
+
/** List of issues found (flat strings for backward compat) */
|
|
257
412
|
issues: string[];
|
|
413
|
+
/** Structured issues with importance, evidence, and suggestions */
|
|
414
|
+
richIssues?: RichIssue[];
|
|
415
|
+
/** Overall assessment (e.g., 'Good', 'Needs Improvement') */
|
|
258
416
|
assessment: string | null;
|
|
417
|
+
/** Detailed reasoning for the score */
|
|
259
418
|
reasoning: string;
|
|
419
|
+
/** Actionable recommendations with priority and expected impact */
|
|
420
|
+
recommendations?: Recommendation[];
|
|
421
|
+
/** What the UI does well */
|
|
422
|
+
strengths?: string[];
|
|
423
|
+
/** Per-dimension scores (e.g., game_authenticity: 9, typography: 7) */
|
|
424
|
+
dimensionScores?: Record<string, number> | null;
|
|
425
|
+
/** Estimated API cost breakdown */
|
|
260
426
|
estimatedCost?: EstimatedCost | null;
|
|
427
|
+
/** Response time in milliseconds */
|
|
261
428
|
responseTime: number;
|
|
429
|
+
/** Whether result was served from cache */
|
|
262
430
|
cached?: boolean;
|
|
431
|
+
/** Raw judgment text from LLM */
|
|
263
432
|
judgment?: string;
|
|
433
|
+
/** Raw API response */
|
|
264
434
|
raw?: unknown;
|
|
435
|
+
/** Extracted semantic information */
|
|
265
436
|
semantic?: SemanticInfo;
|
|
437
|
+
/** Error message if validation failed */
|
|
266
438
|
error?: string;
|
|
439
|
+
/** Status message */
|
|
267
440
|
message?: string;
|
|
441
|
+
/** Provider pricing information */
|
|
268
442
|
pricing?: { input: number; output: number };
|
|
443
|
+
/** Timestamp of validation */
|
|
269
444
|
timestamp?: string;
|
|
445
|
+
/** Test name if provided */
|
|
270
446
|
testName?: string;
|
|
447
|
+
/** Viewport dimensions if provided */
|
|
271
448
|
viewport?: { width: number; height: number } | null;
|
|
449
|
+
/** Cost comparison information (if includeCostComparison enabled) */
|
|
450
|
+
costComparison?: {
|
|
451
|
+
current: { tier: string; provider: string; cost: number };
|
|
452
|
+
tiers: Record<string, number>;
|
|
453
|
+
savings: Record<string, { absolute: number; percent: number; cost: number }>;
|
|
454
|
+
recommendation: { tier: string; cost: number; savings: number; savingsPercent: number; reason: string };
|
|
455
|
+
};
|
|
456
|
+
/** Whether temporal decision skipped this call */
|
|
457
|
+
skipped?: boolean;
|
|
458
|
+
/** Reason for skipping (if skipped) */
|
|
459
|
+
skipReason?: string;
|
|
460
|
+
/** Urgency level (if temporal decision used) */
|
|
461
|
+
urgency?: 'low' | 'medium' | 'high';
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
/**
|
|
465
|
+
* A single visual anchor: either a plain text string or an object
|
|
466
|
+
* with optional dimension scoping and/or an image reference.
|
|
467
|
+
*
|
|
468
|
+
* Plain string: `"Card images large enough to see art"`
|
|
469
|
+
* With dimension: `{ text: "Card images large", dimension: "card_presentation" }`
|
|
470
|
+
* Image ref: `{ image: "/path/to/good.png", label: "Well-themed Magic layout" }`
|
|
471
|
+
* Image + dimension: `{ image: "/path/to/good.png", label: "...", dimension: "game_authenticity" }`
|
|
472
|
+
*
|
|
473
|
+
* Images accept a file path or a data URI (`data:image/png;base64,...`).
|
|
474
|
+
*/
|
|
475
|
+
export type AnchorEntry = string | {
|
|
476
|
+
/** Text description of the anchor signal */
|
|
477
|
+
text?: string;
|
|
478
|
+
/** File path or data URI of a reference screenshot */
|
|
479
|
+
image?: string;
|
|
480
|
+
/** Short label for the image (shown in prompt) */
|
|
481
|
+
label?: string;
|
|
482
|
+
/** Rubric dimension this anchor relates to (e.g., "game_authenticity") */
|
|
483
|
+
dimension?: string;
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Domain-level visual anchors for VLM evaluation grounding.
|
|
488
|
+
*
|
|
489
|
+
* Text anchors describe what to look for / flag in words.
|
|
490
|
+
* Image anchors provide reference screenshots as few-shot visual examples
|
|
491
|
+
* so the VLM can calibrate against concrete good/bad instances.
|
|
492
|
+
*
|
|
493
|
+
* Anchors can optionally be scoped to rubric dimensions via the
|
|
494
|
+
* `dimension` field on AnchorEntry objects.
|
|
495
|
+
*
|
|
496
|
+
* Set once in config for the project; per-call anchors in
|
|
497
|
+
* ValidationContext append to (not replace) config-level anchors.
|
|
498
|
+
*/
|
|
499
|
+
export interface VisualAnchors {
|
|
500
|
+
/** Brief domain description injected as context (e.g., "Card game search UI for TCG players") */
|
|
501
|
+
domain?: string;
|
|
502
|
+
/** Positive signals the VLM should look for (text and/or image entries) */
|
|
503
|
+
positive?: AnchorEntry[];
|
|
504
|
+
/** Negative signals the VLM should flag (text and/or image entries) */
|
|
505
|
+
negative?: AnchorEntry[];
|
|
272
506
|
}
|
|
273
507
|
|
|
274
508
|
export interface ConfigOptions {
|
|
@@ -280,6 +514,8 @@ export interface ConfigOptions {
|
|
|
280
514
|
maxConcurrency?: number;
|
|
281
515
|
timeout?: number;
|
|
282
516
|
verbose?: boolean;
|
|
517
|
+
/** Domain-level visual anchors included in every evaluation prompt */
|
|
518
|
+
anchors?: VisualAnchors | null;
|
|
283
519
|
}
|
|
284
520
|
|
|
285
521
|
export interface Config {
|
|
@@ -294,6 +530,8 @@ export interface Config {
|
|
|
294
530
|
priority: number;
|
|
295
531
|
};
|
|
296
532
|
enabled: boolean;
|
|
533
|
+
/** Normalized visual anchors (null when none configured) */
|
|
534
|
+
anchors: VisualAnchors | null;
|
|
297
535
|
cache: {
|
|
298
536
|
enabled: boolean;
|
|
299
537
|
dir: string | null;
|
|
@@ -307,28 +545,183 @@ export interface Config {
|
|
|
307
545
|
};
|
|
308
546
|
}
|
|
309
547
|
|
|
310
|
-
|
|
548
|
+
/**
|
|
549
|
+
* VLLM Judge Class
|
|
550
|
+
*
|
|
551
|
+
* Core screenshot validation engine using Vision Language Models.
|
|
552
|
+
* Supports multiple providers (Gemini, OpenAI, Claude, Groq) with automatic
|
|
553
|
+
* selection, caching, and cost optimization.
|
|
554
|
+
*
|
|
555
|
+
* **Use when:** You need fine-grained control over validation or custom judge implementations.
|
|
556
|
+
* **Otherwise:** Use `validateScreenshot()` function for simpler API.
|
|
557
|
+
*
|
|
558
|
+
* @example
|
|
559
|
+
* ```typescript
|
|
560
|
+
* // Create custom judge instance
|
|
561
|
+
* const judge = new VLLMJudge({
|
|
562
|
+
* provider: 'gemini',
|
|
563
|
+
* apiKey: process.env.GEMINI_API_KEY,
|
|
564
|
+
* cacheEnabled: true
|
|
565
|
+
* });
|
|
566
|
+
*
|
|
567
|
+
* const result = await judge.judgeScreenshot(
|
|
568
|
+
* 'screenshot.png',
|
|
569
|
+
* 'Evaluate this page'
|
|
570
|
+
* );
|
|
571
|
+
* ```
|
|
572
|
+
*/
|
|
311
573
|
export class VLLMJudge {
|
|
574
|
+
/**
|
|
575
|
+
* Create a new VLLM Judge instance.
|
|
576
|
+
*
|
|
577
|
+
* @param options - Configuration options (provider, API key, cache, etc.)
|
|
578
|
+
*/
|
|
312
579
|
constructor(options?: ConfigOptions);
|
|
580
|
+
|
|
581
|
+
/** Current provider name (gemini, openai, claude, groq) */
|
|
313
582
|
provider: string;
|
|
583
|
+
/** API key for current provider */
|
|
314
584
|
apiKey: string | null;
|
|
585
|
+
/** Provider configuration (model, pricing, etc.) */
|
|
315
586
|
providerConfig: Config['providerConfig'];
|
|
587
|
+
/** Whether validation is enabled (false if API key missing) */
|
|
316
588
|
enabled: boolean;
|
|
317
589
|
|
|
590
|
+
/**
|
|
591
|
+
* Convert image file to base64 string for API.
|
|
592
|
+
*
|
|
593
|
+
* @param imagePath - Path to image file
|
|
594
|
+
* @returns Base64-encoded image string
|
|
595
|
+
* @throws {FileError} If file not found or invalid format
|
|
596
|
+
*/
|
|
318
597
|
imageToBase64(imagePath: string): string;
|
|
598
|
+
|
|
599
|
+
/**
|
|
600
|
+
* Build evaluation prompt with context.
|
|
601
|
+
*
|
|
602
|
+
* @param prompt - Base evaluation prompt
|
|
603
|
+
* @param context - Validation context
|
|
604
|
+
* @returns Enhanced prompt with context
|
|
605
|
+
*/
|
|
319
606
|
buildPrompt(prompt: string, context: ValidationContext): string;
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* Extract semantic information from judgment text.
|
|
610
|
+
*
|
|
611
|
+
* @param judgment - Judgment text or object
|
|
612
|
+
* @returns Structured semantic information
|
|
613
|
+
*/
|
|
320
614
|
extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Estimate API cost for validation.
|
|
618
|
+
*
|
|
619
|
+
* @param data - API request/response data
|
|
620
|
+
* @param provider - Provider name
|
|
621
|
+
* @returns Estimated cost breakdown or null
|
|
622
|
+
*/
|
|
321
623
|
estimateCost(data: unknown, provider: string): EstimatedCost | null;
|
|
322
|
-
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Judge a screenshot using VLLM.
|
|
627
|
+
*
|
|
628
|
+
* @param imagePath - Path to screenshot or array for comparison
|
|
629
|
+
* @param prompt - Evaluation prompt
|
|
630
|
+
* @param context - Optional validation context
|
|
631
|
+
* @returns Promise resolving to ValidationResult
|
|
632
|
+
*/
|
|
633
|
+
judgeScreenshot(imagePath: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult>;
|
|
323
634
|
}
|
|
324
635
|
|
|
325
636
|
// Core Functions
|
|
637
|
+
/**
|
|
638
|
+
* Validate a screenshot using Vision Language Models (VLLM).
|
|
639
|
+
*
|
|
640
|
+
* This is the primary API function. It takes a screenshot and evaluation prompt,
|
|
641
|
+
* sends it to an AI model (Gemini, OpenAI, Claude, or Groq), and returns structured
|
|
642
|
+
* validation results with score, issues, and reasoning.
|
|
643
|
+
*
|
|
644
|
+
* **Key Features:**
|
|
645
|
+
* - Automatic provider selection (cheapest available)
|
|
646
|
+
* - Automatic tier selection (fast/balanced/best)
|
|
647
|
+
* - Built-in caching (7-day TTL)
|
|
648
|
+
* - Cost optimization
|
|
649
|
+
* - Temporal decision making (for high-frequency validation)
|
|
650
|
+
*
|
|
651
|
+
* @param imagePath - Path to screenshot file (PNG, JPEG, GIF, WebP) or array of paths for comparison
|
|
652
|
+
* @param prompt - Evaluation prompt (e.g., "Is this accessible?", "Check if payment form works")
|
|
653
|
+
* @param context - Optional validation context (testType, viewport, optimization options)
|
|
654
|
+
* @returns Promise resolving to ValidationResult with score, issues, reasoning, and metadata
|
|
655
|
+
*
|
|
656
|
+
* @example
|
|
657
|
+
* ```typescript
|
|
658
|
+
* // Basic usage
|
|
659
|
+
* const result = await validateScreenshot(
|
|
660
|
+
* 'screenshot.png',
|
|
661
|
+
* 'Check if this payment form is accessible'
|
|
662
|
+
* );
|
|
663
|
+
* console.log(result.score); // 8.5 (0-10 scale)
|
|
664
|
+
* console.log(result.issues); // ['Low contrast on button', 'Missing label']
|
|
665
|
+
* console.log(result.reasoning); // "The form is mostly accessible..."
|
|
666
|
+
* ```
|
|
667
|
+
*
|
|
668
|
+
* @example
|
|
669
|
+
* ```typescript
|
|
670
|
+
* // With cost optimization
|
|
671
|
+
* const result = await validateScreenshot(
|
|
672
|
+
* 'screenshot.png',
|
|
673
|
+
* 'Evaluate accessibility',
|
|
674
|
+
* {
|
|
675
|
+
* autoSelectTier: true,
|
|
676
|
+
* autoSelectProvider: true,
|
|
677
|
+
* includeCostComparison: true
|
|
678
|
+
* }
|
|
679
|
+
* );
|
|
680
|
+
* console.log(result.costComparison?.savings.fast?.percent); // 45% savings
|
|
681
|
+
* ```
|
|
682
|
+
*
|
|
683
|
+
* @example
|
|
684
|
+
* ```typescript
|
|
685
|
+
* // High-frequency validation (60Hz)
|
|
686
|
+
* const result = await validateScreenshot(
|
|
687
|
+
* 'frame.png',
|
|
688
|
+
* 'Is the game playable?',
|
|
689
|
+
* {
|
|
690
|
+
* frequency: 60,
|
|
691
|
+
* autoSelectTier: true,
|
|
692
|
+
* useTemporalDecision: true
|
|
693
|
+
* }
|
|
694
|
+
* );
|
|
695
|
+
* ```
|
|
696
|
+
*
|
|
697
|
+
* @throws {FileError} If screenshot file not found or invalid format
|
|
698
|
+
* @throws {ValidationError} If validation fails
|
|
699
|
+
* @throws {ProviderError} If API provider error occurs
|
|
700
|
+
* @throws {TimeoutError} If request times out
|
|
701
|
+
*/
|
|
326
702
|
export function validateScreenshot(
|
|
327
|
-
imagePath: string,
|
|
703
|
+
imagePath: string | string[],
|
|
328
704
|
prompt: string,
|
|
329
705
|
context?: ValidationContext
|
|
330
706
|
): Promise<ValidationResult>;
|
|
331
707
|
|
|
708
|
+
/**
|
|
709
|
+
* Extract semantic information from VLLM judgment text.
|
|
710
|
+
*
|
|
711
|
+
* Parses AI judgment responses into structured data (score, issues, reasoning).
|
|
712
|
+
* Useful for custom implementations that need to parse judgment text.
|
|
713
|
+
*
|
|
714
|
+
* @param judgment - Judgment text or object from VLLM
|
|
715
|
+
* @returns Structured semantic information with score, issues, assessment, reasoning
|
|
716
|
+
*
|
|
717
|
+
* @example
|
|
718
|
+
* ```typescript
|
|
719
|
+
* const judgment = "Score: 8.5. Issues: Low contrast. Reasoning: The form is mostly accessible...";
|
|
720
|
+
* const info = extractSemanticInfo(judgment);
|
|
721
|
+
* console.log(info.score); // 8.5
|
|
722
|
+
* console.log(info.issues); // ['Low contrast']
|
|
723
|
+
* ```
|
|
724
|
+
*/
|
|
332
725
|
export function extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
333
726
|
|
|
334
727
|
// Multi-Modal Types
|
|
@@ -405,11 +798,33 @@ export function multiModalValidation(
|
|
|
405
798
|
}>;
|
|
406
799
|
|
|
407
800
|
// Temporal Types
|
|
801
|
+
/**
|
|
802
|
+
* Temporal note for tracking state over time.
|
|
803
|
+
*
|
|
804
|
+
* Used in high-frequency validation (10-60Hz) to track observations
|
|
805
|
+
* and enable temporal decision making (reduces LLM calls by 98.5%).
|
|
806
|
+
*
|
|
807
|
+
* @example
|
|
808
|
+
* ```typescript
|
|
809
|
+
* const note: TemporalNote = {
|
|
810
|
+
* timestamp: Date.now(),
|
|
811
|
+
* elapsed: 100,
|
|
812
|
+
* score: 8.5,
|
|
813
|
+
* observation: 'Button clicked',
|
|
814
|
+
* step: 'checkout'
|
|
815
|
+
* };
|
|
816
|
+
* ```
|
|
817
|
+
*/
|
|
408
818
|
export interface TemporalNote {
|
|
819
|
+
/** Timestamp in milliseconds */
|
|
409
820
|
timestamp?: number;
|
|
821
|
+
/** Elapsed time since start in milliseconds */
|
|
410
822
|
elapsed?: number;
|
|
823
|
+
/** Quality score (0-10) */
|
|
411
824
|
score?: number;
|
|
825
|
+
/** Observation description */
|
|
412
826
|
observation?: string;
|
|
827
|
+
/** Step identifier */
|
|
413
828
|
step?: string;
|
|
414
829
|
}
|
|
415
830
|
|
|
@@ -437,6 +852,36 @@ export interface AggregatedTemporalNotes {
|
|
|
437
852
|
}
|
|
438
853
|
|
|
439
854
|
// Temporal Functions
|
|
855
|
+
/**
|
|
856
|
+
* Aggregate temporal notes into time windows with weighted scores.
|
|
857
|
+
*
|
|
858
|
+
* Used for high-frequency validation to reduce LLM calls by aggregating
|
|
859
|
+
* observations over time windows. Implements exponential decay weighting
|
|
860
|
+
* (recent notes weighted more heavily).
|
|
861
|
+
*
|
|
862
|
+
* **Research:** Inspired by arXiv:2505.17663 (DynToM) and arXiv:2507.15851
|
|
863
|
+
* (Human Temporal Cognition), adapted with exponential decay for practical use.
|
|
864
|
+
*
|
|
865
|
+
* @param notes - Array of temporal notes to aggregate
|
|
866
|
+
* @param options - Aggregation options
|
|
867
|
+
* @param options.windowSize - Time window size in milliseconds (default: 1000)
|
|
868
|
+
* @param options.decayFactor - Exponential decay factor (default: 0.9)
|
|
869
|
+
* @param options.coherenceThreshold - Coherence threshold for filtering (default: 0.5)
|
|
870
|
+
* @returns Aggregated notes with windows, summary, and coherence score
|
|
871
|
+
*
|
|
872
|
+
* @example
|
|
873
|
+
* ```typescript
|
|
874
|
+
* const notes: TemporalNote[] = [
|
|
875
|
+
* { timestamp: 0, score: 8, observation: 'Initial state' },
|
|
876
|
+
* { timestamp: 100, score: 8.5, observation: 'Button clicked' },
|
|
877
|
+
* { timestamp: 200, score: 9, observation: 'Form submitted' }
|
|
878
|
+
* ];
|
|
879
|
+
*
|
|
880
|
+
* const aggregated = aggregateTemporalNotes(notes);
|
|
881
|
+
* console.log(aggregated.coherence); // 0.92 (high coherence)
|
|
882
|
+
* console.log(aggregated.windows[0].avgScore); // 8.5
|
|
883
|
+
* ```
|
|
884
|
+
*/
|
|
440
885
|
export function aggregateTemporalNotes(
|
|
441
886
|
notes: TemporalNote[],
|
|
442
887
|
options?: {
|
|
@@ -450,6 +895,223 @@ export function formatNotesForPrompt(aggregated: AggregatedTemporalNotes): strin
|
|
|
450
895
|
|
|
451
896
|
export function calculateCoherence(windows: TemporalWindow[]): number;
|
|
452
897
|
|
|
898
|
+
/**
|
|
899
|
+
* Temporal Decision Manager
|
|
900
|
+
*
|
|
901
|
+
* Decides when to call LLM vs. reuse previous result for high-frequency validation.
|
|
902
|
+
* Reduces LLM calls by 98.5% while maintaining accuracy through temporal coherence.
|
|
903
|
+
*
|
|
904
|
+
* **Research:** Based on arXiv:2406.12125 - "Efficient Sequential Decision Making with Large Language Models"
|
|
905
|
+
*
|
|
906
|
+
* **Core Insight:** Don't prompt on every state change, prompt when decision is needed.
|
|
907
|
+
*
|
|
908
|
+
* **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
|
|
909
|
+
*
|
|
910
|
+
* @example
|
|
911
|
+
* ```typescript
|
|
912
|
+
* const manager = new TemporalDecisionManager({
|
|
913
|
+
* minNotesForPrompt: 3,
|
|
914
|
+
* coherenceThreshold: 0.5,
|
|
915
|
+
* urgencyThreshold: 0.3
|
|
916
|
+
* });
|
|
917
|
+
*
|
|
918
|
+
* const decision = await manager.shouldPrompt(
|
|
919
|
+
* currentState,
|
|
920
|
+
* previousState,
|
|
921
|
+
* temporalNotes,
|
|
922
|
+
* context
|
|
923
|
+
* );
|
|
924
|
+
*
|
|
925
|
+
* if (decision.shouldPrompt) {
|
|
926
|
+
* // Call LLM
|
|
927
|
+
* } else {
|
|
928
|
+
* // Reuse previous result
|
|
929
|
+
* }
|
|
930
|
+
* ```
|
|
931
|
+
*/
|
|
932
|
+
export class TemporalDecisionManager {
|
|
933
|
+
/**
|
|
934
|
+
* Create a new Temporal Decision Manager.
|
|
935
|
+
*
|
|
936
|
+
* @param options - Decision manager options
|
|
937
|
+
* @param options.minNotesForPrompt - Minimum notes before prompting (default: 3)
|
|
938
|
+
* @param options.coherenceThreshold - Coherence threshold for prompting (default: 0.5)
|
|
939
|
+
* @param options.urgencyThreshold - Urgency threshold for prompting (default: 0.3)
|
|
940
|
+
* @param options.maxWaitTime - Maximum wait time before forcing prompt (default: 10000ms)
|
|
941
|
+
* @param options.stateChangeThreshold - State change threshold for prompting (default: 0.2)
|
|
942
|
+
* @param options.warmStartSteps - Use LLM for first N steps (default: 10)
|
|
943
|
+
* @param options.adaptiveSampling - Enable adaptive sampling (default: true)
|
|
944
|
+
*/
|
|
945
|
+
constructor(options?: {
|
|
946
|
+
minNotesForPrompt?: number;
|
|
947
|
+
coherenceThreshold?: number;
|
|
948
|
+
urgencyThreshold?: number;
|
|
949
|
+
maxWaitTime?: number;
|
|
950
|
+
stateChangeThreshold?: number;
|
|
951
|
+
warmStartSteps?: number;
|
|
952
|
+
adaptiveSampling?: boolean;
|
|
953
|
+
});
|
|
954
|
+
|
|
955
|
+
/**
|
|
956
|
+
* Decide if we should prompt now or wait for more context.
|
|
957
|
+
*
|
|
958
|
+
* @param currentState - Current state object
|
|
959
|
+
* @param previousState - Previous state object (if any)
|
|
960
|
+
* @param temporalNotes - Array of temporal notes
|
|
961
|
+
* @param context - Additional context
|
|
962
|
+
* @returns Decision object with shouldPrompt, reason, and urgency
|
|
963
|
+
*/
|
|
964
|
+
shouldPrompt(
|
|
965
|
+
currentState: Record<string, unknown>,
|
|
966
|
+
previousState: Record<string, unknown> | null,
|
|
967
|
+
temporalNotes: TemporalNote[],
|
|
968
|
+
context?: Record<string, unknown>
|
|
969
|
+
): Promise<{
|
|
970
|
+
shouldPrompt: boolean;
|
|
971
|
+
reason: string;
|
|
972
|
+
urgency: 'low' | 'medium' | 'high';
|
|
973
|
+
}>;
|
|
974
|
+
|
|
975
|
+
/**
|
|
976
|
+
* Calculate state change magnitude.
|
|
977
|
+
*
|
|
978
|
+
* @param currentState - Current state
|
|
979
|
+
* @param previousState - Previous state
|
|
980
|
+
* @returns State change score (0-1)
|
|
981
|
+
*/
|
|
982
|
+
calculateStateChange(
|
|
983
|
+
currentState: Record<string, unknown>,
|
|
984
|
+
previousState: Record<string, unknown> | null
|
|
985
|
+
): number;
|
|
986
|
+
|
|
987
|
+
/**
|
|
988
|
+
* Check if current state is a decision point.
|
|
989
|
+
*
|
|
990
|
+
* @param currentState - Current state
|
|
991
|
+
* @param context - Additional context
|
|
992
|
+
* @returns True if decision point
|
|
993
|
+
*/
|
|
994
|
+
isDecisionPoint(
|
|
995
|
+
currentState: Record<string, unknown>,
|
|
996
|
+
context?: Record<string, unknown>
|
|
997
|
+
): boolean;
|
|
998
|
+
|
|
999
|
+
/**
|
|
1000
|
+
* Check if there's a recent user action.
|
|
1001
|
+
*
|
|
1002
|
+
* @param temporalNotes - Array of temporal notes
|
|
1003
|
+
* @param context - Additional context
|
|
1004
|
+
* @returns True if recent user action detected
|
|
1005
|
+
*/
|
|
1006
|
+
hasRecentUserAction(
|
|
1007
|
+
temporalNotes: TemporalNote[],
|
|
1008
|
+
context?: Record<string, unknown>
|
|
1009
|
+
): boolean;
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
/**
|
|
1013
|
+
* Create a temporal decision manager with default options.
|
|
1014
|
+
*
|
|
1015
|
+
* @param options - Decision manager options
|
|
1016
|
+
* @returns New TemporalDecisionManager instance
|
|
1017
|
+
*/
|
|
1018
|
+
export function createTemporalDecisionManager(options?: {
|
|
1019
|
+
minNotesForPrompt?: number;
|
|
1020
|
+
coherenceThreshold?: number;
|
|
1021
|
+
urgencyThreshold?: number;
|
|
1022
|
+
maxWaitTime?: number;
|
|
1023
|
+
stateChangeThreshold?: number;
|
|
1024
|
+
warmStartSteps?: number;
|
|
1025
|
+
adaptiveSampling?: boolean;
|
|
1026
|
+
}): TemporalDecisionManager;
|
|
1027
|
+
|
|
1028
|
+
/**
|
|
1029
|
+
* Temporal Preprocessing Manager
|
|
1030
|
+
*
|
|
1031
|
+
* Optimizes temporal note processing for high-frequency validation (10-60Hz).
|
|
1032
|
+
* Implements activity-based preprocessing patterns to reduce computational overhead.
|
|
1033
|
+
*
|
|
1034
|
+
* **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
|
|
1035
|
+
*
|
|
1036
|
+
* @example
|
|
1037
|
+
* ```typescript
|
|
1038
|
+
* const manager = new TemporalPreprocessingManager({
|
|
1039
|
+
* activityThreshold: 0.5,
|
|
1040
|
+
* highFrequencyMode: true
|
|
1041
|
+
* });
|
|
1042
|
+
*
|
|
1043
|
+
* const processed = await manager.preprocess(temporalNotes, context);
|
|
1044
|
+
* ```
|
|
1045
|
+
*/
|
|
1046
|
+
export class TemporalPreprocessingManager {
|
|
1047
|
+
/**
|
|
1048
|
+
* Create a new Temporal Preprocessing Manager.
|
|
1049
|
+
*
|
|
1050
|
+
* @param options - Preprocessing options
|
|
1051
|
+
*/
|
|
1052
|
+
constructor(options?: Record<string, unknown>);
|
|
1053
|
+
|
|
1054
|
+
/**
|
|
1055
|
+
* Preprocess temporal notes for efficient handling.
|
|
1056
|
+
*
|
|
1057
|
+
* @param notes - Array of temporal notes
|
|
1058
|
+
* @param context - Additional context
|
|
1059
|
+
* @returns Processed notes
|
|
1060
|
+
*/
|
|
1061
|
+
preprocess(
|
|
1062
|
+
notes: TemporalNote[],
|
|
1063
|
+
context?: Record<string, unknown>
|
|
1064
|
+
): Promise<TemporalNote[]>;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
/**
|
|
1068
|
+
* Adaptive Temporal Processor
|
|
1069
|
+
*
|
|
1070
|
+
* Adaptively processes temporal notes based on activity patterns.
|
|
1071
|
+
*
|
|
1072
|
+
* @example
|
|
1073
|
+
* ```typescript
|
|
1074
|
+
* const processor = new AdaptiveTemporalProcessor();
|
|
1075
|
+
* const processed = await processor.process(notes, context);
|
|
1076
|
+
* ```
|
|
1077
|
+
*/
|
|
1078
|
+
export class AdaptiveTemporalProcessor {
|
|
1079
|
+
/**
|
|
1080
|
+
* Create a new Adaptive Temporal Processor.
|
|
1081
|
+
*
|
|
1082
|
+
* @param options - Processor options
|
|
1083
|
+
*/
|
|
1084
|
+
constructor(options?: Record<string, unknown>);
|
|
1085
|
+
|
|
1086
|
+
/**
|
|
1087
|
+
* Process temporal notes adaptively.
|
|
1088
|
+
*
|
|
1089
|
+
* @param notes - Array of temporal notes
|
|
1090
|
+
* @param context - Additional context
|
|
1091
|
+
* @returns Processed notes
|
|
1092
|
+
*/
|
|
1093
|
+
process(
|
|
1094
|
+
notes: TemporalNote[],
|
|
1095
|
+
context?: Record<string, unknown>
|
|
1096
|
+
): Promise<TemporalNote[]>;
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
/**
|
|
1100
|
+
* Create a temporal preprocessing manager with default options.
|
|
1101
|
+
*
|
|
1102
|
+
* @param options - Preprocessing options
|
|
1103
|
+
* @returns New TemporalPreprocessingManager instance
|
|
1104
|
+
*/
|
|
1105
|
+
export function createTemporalPreprocessingManager(options?: Record<string, unknown>): TemporalPreprocessingManager;
|
|
1106
|
+
|
|
1107
|
+
/**
|
|
1108
|
+
* Create an adaptive temporal processor with default options.
|
|
1109
|
+
*
|
|
1110
|
+
* @param options - Processor options
|
|
1111
|
+
* @returns New AdaptiveTemporalProcessor instance
|
|
1112
|
+
*/
|
|
1113
|
+
export function createAdaptiveTemporalProcessor(options?: Record<string, unknown>): AdaptiveTemporalProcessor;
|
|
1114
|
+
|
|
453
1115
|
// Cache Types
|
|
454
1116
|
export interface CacheStats {
|
|
455
1117
|
hits: number;
|
|
@@ -459,16 +1121,77 @@ export interface CacheStats {
|
|
|
459
1121
|
}
|
|
460
1122
|
|
|
461
1123
|
// Cache Functions
|
|
1124
|
+
/**
|
|
1125
|
+
* Initialize cache system.
|
|
1126
|
+
*
|
|
1127
|
+
* Sets up file-based caching with 7-day TTL. Cache persists across
|
|
1128
|
+
* process restarts and reduces API costs by serving cached results.
|
|
1129
|
+
*
|
|
1130
|
+
* @param cacheDir - Cache directory path (default: `.cache/ai-visual-test`)
|
|
1131
|
+
*
|
|
1132
|
+
* @example
|
|
1133
|
+
* ```typescript
|
|
1134
|
+
* initCache('/tmp/my-cache');
|
|
1135
|
+
* const result = await validateScreenshot('screenshot.png', 'Evaluate');
|
|
1136
|
+
* // Subsequent calls with same screenshot/prompt use cache
|
|
1137
|
+
* ```
|
|
1138
|
+
*/
|
|
462
1139
|
export function initCache(cacheDir?: string): void;
|
|
1140
|
+
|
|
1141
|
+
/**
|
|
1142
|
+
* Generate cache key for validation request.
|
|
1143
|
+
*
|
|
1144
|
+
* Creates SHA-256 hash of image path, prompt, and context for cache lookup.
|
|
1145
|
+
*
|
|
1146
|
+
* @param imagePath - Screenshot path
|
|
1147
|
+
* @param prompt - Evaluation prompt
|
|
1148
|
+
* @param context - Validation context
|
|
1149
|
+
* @returns Cache key string
|
|
1150
|
+
*/
|
|
463
1151
|
export function generateCacheKey(imagePath: string, prompt: string, context?: ValidationContext): string;
|
|
1152
|
+
|
|
1153
|
+
/**
|
|
1154
|
+
* Get cached validation result.
|
|
1155
|
+
*
|
|
1156
|
+
* @param imagePath - Screenshot path
|
|
1157
|
+
* @param prompt - Evaluation prompt
|
|
1158
|
+
* @param context - Validation context
|
|
1159
|
+
* @returns Cached ValidationResult or null if not cached
|
|
1160
|
+
*/
|
|
464
1161
|
export function getCached(imagePath: string, prompt: string, context?: ValidationContext): ValidationResult | null;
|
|
1162
|
+
|
|
1163
|
+
/**
|
|
1164
|
+
* Cache validation result.
|
|
1165
|
+
*
|
|
1166
|
+
* @param imagePath - Screenshot path
|
|
1167
|
+
* @param prompt - Evaluation prompt
|
|
1168
|
+
* @param context - Validation context
|
|
1169
|
+
* @param result - Validation result to cache
|
|
1170
|
+
*/
|
|
465
1171
|
export function setCached(
|
|
466
1172
|
imagePath: string,
|
|
467
1173
|
prompt: string,
|
|
468
1174
|
context: ValidationContext,
|
|
469
1175
|
result: ValidationResult
|
|
470
1176
|
): void;
|
|
1177
|
+
|
|
1178
|
+
/**
|
|
1179
|
+
* Clear all cached results.
|
|
1180
|
+
*/
|
|
471
1181
|
export function clearCache(): void;
|
|
1182
|
+
|
|
1183
|
+
/**
|
|
1184
|
+
* Get cache statistics.
|
|
1185
|
+
*
|
|
1186
|
+
* @returns Cache stats (hits, misses, size, hit rate)
|
|
1187
|
+
*
|
|
1188
|
+
* @example
|
|
1189
|
+
* ```typescript
|
|
1190
|
+
* const stats = getCacheStats();
|
|
1191
|
+
* console.log(`Hit rate: ${stats.hitRate * 100}%`); // 85%
|
|
1192
|
+
* console.log(`Cache size: ${stats.size}`); // 123
|
|
1193
|
+
* ```
|
|
1194
|
+
*/
|
|
472
1195
|
export function getCacheStats(): CacheStats;
|
|
473
1196
|
|
|
474
1197
|
// Config Functions
|
|
@@ -507,10 +1230,58 @@ export class ScoreTracker {
|
|
|
507
1230
|
}
|
|
508
1231
|
|
|
509
1232
|
// BatchOptimizer Class
|
|
1233
|
+
/**
|
|
1234
|
+
* Batch Optimizer
|
|
1235
|
+
*
|
|
1236
|
+
* Optimizes validation of multiple screenshots by batching requests,
|
|
1237
|
+
* managing concurrency, and caching results.
|
|
1238
|
+
*
|
|
1239
|
+
* **Use when:** You need to validate multiple screenshots efficiently.
|
|
1240
|
+
*
|
|
1241
|
+
* @example
|
|
1242
|
+
* ```typescript
|
|
1243
|
+
* const optimizer = new BatchOptimizer({
|
|
1244
|
+
* maxConcurrency: 5,
|
|
1245
|
+
* batchSize: 10,
|
|
1246
|
+
* cacheEnabled: true
|
|
1247
|
+
* });
|
|
1248
|
+
*
|
|
1249
|
+
* const results = await optimizer.batchValidate(
|
|
1250
|
+
* ['screenshot1.png', 'screenshot2.png', 'screenshot3.png'],
|
|
1251
|
+
* 'Evaluate accessibility'
|
|
1252
|
+
* );
|
|
1253
|
+
*
|
|
1254
|
+
* console.log(results.length); // 3
|
|
1255
|
+
* ```
|
|
1256
|
+
*/
|
|
510
1257
|
export class BatchOptimizer {
|
|
1258
|
+
/**
|
|
1259
|
+
* Create a new Batch Optimizer instance.
|
|
1260
|
+
*
|
|
1261
|
+
* @param options - Optimizer options (maxConcurrency, batchSize, cacheEnabled)
|
|
1262
|
+
*/
|
|
511
1263
|
constructor(options?: { maxConcurrency?: number; batchSize?: number; cacheEnabled?: boolean });
|
|
1264
|
+
|
|
1265
|
+
/**
|
|
1266
|
+
* Validate multiple screenshots in batch.
|
|
1267
|
+
*
|
|
1268
|
+
* @param imagePaths - Single path, array of paths, or array of arrays for comparison
|
|
1269
|
+
* @param prompt - Evaluation prompt
|
|
1270
|
+
* @param context - Optional validation context
|
|
1271
|
+
* @returns Promise resolving to array of ValidationResults
|
|
1272
|
+
*/
|
|
512
1273
|
batchValidate(imagePaths: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult[]>;
|
|
1274
|
+
|
|
1275
|
+
/**
|
|
1276
|
+
* Clear batch optimizer cache.
|
|
1277
|
+
*/
|
|
513
1278
|
clearCache(): void;
|
|
1279
|
+
|
|
1280
|
+
/**
|
|
1281
|
+
* Get cache statistics.
|
|
1282
|
+
*
|
|
1283
|
+
* @returns Cache stats (size, queue length, active requests)
|
|
1284
|
+
*/
|
|
514
1285
|
getCacheStats(): { cacheSize: number; queueLength: number; activeRequests: number };
|
|
515
1286
|
}
|
|
516
1287
|
|
|
@@ -775,18 +1546,76 @@ export interface StateValidationResult<T = unknown> extends ValidationResult {
|
|
|
775
1546
|
matches: boolean;
|
|
776
1547
|
}
|
|
777
1548
|
|
|
1549
|
+
/**
|
|
1550
|
+
* State Validator
|
|
1551
|
+
*
|
|
1552
|
+
* Validates that visual state matches expected state using VLLM extraction.
|
|
1553
|
+
* Extracts state from screenshot and compares with expected state.
|
|
1554
|
+
*
|
|
1555
|
+
* **Use when:** You need to verify specific state values (cart count, button text, etc.)
|
|
1556
|
+
*
|
|
1557
|
+
* @example
|
|
1558
|
+
* ```typescript
|
|
1559
|
+
* const validator = new StateValidator();
|
|
1560
|
+
*
|
|
1561
|
+
* const result = await validator.validateState(
|
|
1562
|
+
* 'checkout.png',
|
|
1563
|
+
* {
|
|
1564
|
+
* cartCount: 1,
|
|
1565
|
+
* buttonText: 'Checkout'
|
|
1566
|
+
* },
|
|
1567
|
+
* {
|
|
1568
|
+
* testType: 'cart-state'
|
|
1569
|
+
* }
|
|
1570
|
+
* );
|
|
1571
|
+
*
|
|
1572
|
+
* console.log(result.matches); // true/false
|
|
1573
|
+
* console.log(result.discrepancies); // ['cartCount: expected 1, got 2']
|
|
1574
|
+
* ```
|
|
1575
|
+
*/
|
|
778
1576
|
export class StateValidator<T = unknown> {
|
|
1577
|
+
/**
|
|
1578
|
+
* Create a new State Validator instance.
|
|
1579
|
+
*
|
|
1580
|
+
* @param options - Validator options (tolerance, state extractor, etc.)
|
|
1581
|
+
*/
|
|
779
1582
|
constructor(options?: StateValidatorOptions<T>);
|
|
1583
|
+
|
|
1584
|
+
/**
|
|
1585
|
+
* Validate state (static method).
|
|
1586
|
+
*
|
|
1587
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1588
|
+
* @param expectedState - Expected state object
|
|
1589
|
+
* @param options - Validation options
|
|
1590
|
+
* @returns Promise resolving to StateValidationResult
|
|
1591
|
+
*/
|
|
780
1592
|
static validate<T = unknown>(
|
|
781
1593
|
screenshotPath: string | string[],
|
|
782
1594
|
expectedState: T,
|
|
783
1595
|
options?: StateValidationOptions<T>
|
|
784
1596
|
): Promise<StateValidationResult<T>>;
|
|
1597
|
+
|
|
1598
|
+
/**
|
|
1599
|
+
* Validate state matches expected state.
|
|
1600
|
+
*
|
|
1601
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1602
|
+
* @param expectedState - Expected state object
|
|
1603
|
+
* @param options - Validation options
|
|
1604
|
+
* @returns Promise resolving to StateValidationResult
|
|
1605
|
+
*/
|
|
785
1606
|
validateState(
|
|
786
1607
|
screenshotPath: string | string[],
|
|
787
1608
|
expectedState: T,
|
|
788
1609
|
options?: StateValidationOptions<T>
|
|
789
1610
|
): Promise<StateValidationResult<T>>;
|
|
1611
|
+
|
|
1612
|
+
/**
|
|
1613
|
+
* Build state validation prompt.
|
|
1614
|
+
*
|
|
1615
|
+
* @param expectedState - Expected state object
|
|
1616
|
+
* @param options - Validation options
|
|
1617
|
+
* @returns Validation prompt string
|
|
1618
|
+
*/
|
|
790
1619
|
buildStatePrompt(expectedState: T, options?: StateValidationOptions<T>): string;
|
|
791
1620
|
}
|
|
792
1621
|
|
|
@@ -820,22 +1649,90 @@ export interface AccessibilityResult extends ValidationResult {
|
|
|
820
1649
|
standards: string[];
|
|
821
1650
|
}
|
|
822
1651
|
|
|
1652
|
+
/**
|
|
1653
|
+
* Accessibility Validator
|
|
1654
|
+
*
|
|
1655
|
+
* Validates accessibility using VLLM semantic evaluation.
|
|
1656
|
+
* Checks contrast, labels, keyboard navigation, error messages, and WCAG compliance.
|
|
1657
|
+
*
|
|
1658
|
+
* **Use when:** You need comprehensive accessibility validation beyond programmatic checks.
|
|
1659
|
+
*
|
|
1660
|
+
* @example
|
|
1661
|
+
* ```typescript
|
|
1662
|
+
* const validator = new AccessibilityValidator({
|
|
1663
|
+
* minContrast: 4.5,
|
|
1664
|
+
* standards: ['WCAG-AA']
|
|
1665
|
+
* });
|
|
1666
|
+
*
|
|
1667
|
+
* const result = await validator.validateAccessibility(
|
|
1668
|
+
* 'payment-form.png',
|
|
1669
|
+
* {
|
|
1670
|
+
* testType: 'accessibility'
|
|
1671
|
+
* }
|
|
1672
|
+
* );
|
|
1673
|
+
*
|
|
1674
|
+
* console.log(result.passes); // true/false
|
|
1675
|
+
* console.log(result.violations.zeroTolerance); // Critical violations
|
|
1676
|
+
* ```
|
|
1677
|
+
*/
|
|
823
1678
|
export class AccessibilityValidator {
|
|
1679
|
+
/**
|
|
1680
|
+
* Create a new Accessibility Validator instance.
|
|
1681
|
+
*
|
|
1682
|
+
* @param options - Validator options (minContrast, standards, etc.)
|
|
1683
|
+
*/
|
|
824
1684
|
constructor(options?: AccessibilityValidatorOptions);
|
|
1685
|
+
|
|
1686
|
+
/**
|
|
1687
|
+
* Validate accessibility (static method).
|
|
1688
|
+
*
|
|
1689
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1690
|
+
* @param options - Validation options
|
|
1691
|
+
* @returns Promise resolving to AccessibilityResult
|
|
1692
|
+
*/
|
|
825
1693
|
static validate(
|
|
826
1694
|
screenshotPath: string | string[],
|
|
827
1695
|
options?: AccessibilityOptions
|
|
828
1696
|
): Promise<AccessibilityResult>;
|
|
1697
|
+
|
|
1698
|
+
/**
|
|
1699
|
+
* Validate accessibility of screenshot.
|
|
1700
|
+
*
|
|
1701
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1702
|
+
* @param options - Validation options
|
|
1703
|
+
* @returns Promise resolving to AccessibilityResult
|
|
1704
|
+
*/
|
|
829
1705
|
validateAccessibility(
|
|
830
1706
|
screenshotPath: string | string[],
|
|
831
1707
|
options?: AccessibilityOptions
|
|
832
1708
|
): Promise<AccessibilityResult>;
|
|
1709
|
+
|
|
1710
|
+
/**
|
|
1711
|
+
* Build accessibility validation prompt.
|
|
1712
|
+
*
|
|
1713
|
+
* @param options - Validation options
|
|
1714
|
+
* @returns Validation prompt string
|
|
1715
|
+
*/
|
|
833
1716
|
buildAccessibilityPrompt(options?: AccessibilityOptions): string;
|
|
1717
|
+
|
|
1718
|
+
/**
|
|
1719
|
+
* Detect accessibility violations from validation result.
|
|
1720
|
+
*
|
|
1721
|
+
* @param result - Validation result
|
|
1722
|
+
* @returns Categorized violations (zeroTolerance, critical, warnings)
|
|
1723
|
+
*/
|
|
834
1724
|
detectViolations(result: ValidationResult): {
|
|
835
1725
|
zeroTolerance: string[];
|
|
836
1726
|
critical: string[];
|
|
837
1727
|
warnings: string[];
|
|
838
1728
|
};
|
|
1729
|
+
|
|
1730
|
+
/**
|
|
1731
|
+
* Extract contrast information from validation result.
|
|
1732
|
+
*
|
|
1733
|
+
* @param result - Validation result
|
|
1734
|
+
* @returns Contrast ratios and compliance status
|
|
1735
|
+
*/
|
|
839
1736
|
extractContrastInfo(result: ValidationResult): {
|
|
840
1737
|
ratios: string[];
|
|
841
1738
|
minRatio: number | null;
|