@arclabs561/ai-visual-test 0.5.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +102 -11
- package/DEPLOYMENT.md +225 -9
- package/README.md +71 -80
- package/index.d.ts +862 -3
- package/package.json +10 -51
- package/src/batch-optimizer.mjs +39 -0
- package/src/cache.mjs +241 -16
- package/src/config.mjs +33 -91
- package/src/constants.mjs +54 -0
- package/src/convenience.mjs +113 -10
- package/src/cost-optimization.mjs +1 -0
- package/src/cost-tracker.mjs +134 -2
- package/src/data-extractor.mjs +36 -7
- package/src/dynamic-few-shot.mjs +69 -11
- package/src/errors.mjs +6 -2
- package/src/experience-propagation.mjs +12 -0
- package/src/experience-tracer.mjs +12 -3
- package/src/game-player.mjs +222 -43
- package/src/graceful-shutdown.mjs +126 -0
- package/src/helpers/playwright.mjs +22 -8
- package/src/human-validation-manager.mjs +99 -2
- package/src/index.mjs +48 -3
- package/src/integrations/playwright.mjs +140 -0
- package/src/judge.mjs +697 -24
- package/src/load-env.mjs +2 -1
- package/src/logger.mjs +31 -3
- package/src/model-tier-selector.mjs +1 -221
- package/src/natural-language-specs.mjs +31 -3
- package/src/persona-enhanced.mjs +4 -2
- package/src/persona-experience.mjs +1 -1
- package/src/pricing.mjs +28 -0
- package/src/prompt-composer.mjs +162 -5
- package/src/provider-data.mjs +115 -0
- package/src/render-change-detector.mjs +5 -0
- package/src/research-enhanced-validation.mjs +7 -5
- package/src/retry.mjs +21 -7
- package/src/rubrics.mjs +4 -0
- package/src/safe-logger.mjs +71 -0
- package/src/session-cost-tracker.mjs +320 -0
- package/src/smart-validator.mjs +8 -8
- package/src/spec-templates.mjs +52 -6
- package/src/startup-validation.mjs +127 -0
- package/src/temporal-adaptive.mjs +2 -2
- package/src/temporal-decision-manager.mjs +1 -271
- package/src/temporal-logic.mjs +104 -0
- package/src/temporal-note-pruner.mjs +119 -0
- package/src/temporal-preprocessor.mjs +1 -543
- package/src/temporal.mjs +681 -79
- package/src/utils/action-hallucination-detector.mjs +301 -0
- package/src/utils/baseline-validator.mjs +82 -0
- package/src/utils/cache-stats.mjs +104 -0
- package/src/utils/cached-llm.mjs +164 -0
- package/src/utils/capability-stratifier.mjs +108 -0
- package/src/utils/counterfactual-tester.mjs +83 -0
- package/src/utils/error-recovery.mjs +117 -0
- package/src/utils/explainability-scorer.mjs +119 -0
- package/src/utils/exploratory-automation.mjs +131 -0
- package/src/utils/index.mjs +10 -0
- package/src/utils/intent-recognizer.mjs +201 -0
- package/src/utils/log-sanitizer.mjs +165 -0
- package/src/utils/path-validator.mjs +88 -0
- package/src/utils/performance-logger.mjs +316 -0
- package/src/utils/performance-measurement.mjs +280 -0
- package/src/utils/prompt-sanitizer.mjs +213 -0
- package/src/utils/rate-limiter.mjs +144 -0
- package/src/validation-framework.mjs +24 -20
- package/src/validation-result-normalizer.mjs +27 -1
- package/src/validation.mjs +75 -25
- package/src/validators/accessibility-validator.mjs +144 -0
- package/src/validators/hybrid-validator.mjs +48 -4
- package/api/health.js +0 -34
- package/api/validate.js +0 -252
- package/public/index.html +0 -149
- package/vercel.json +0 -27
package/index.d.ts
CHANGED
|
@@ -210,26 +210,126 @@ export interface EnsembleResult {
|
|
|
210
210
|
votingMethod: string;
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
/**
|
|
214
|
+
* Ensemble Judge
|
|
215
|
+
*
|
|
216
|
+
* Uses multiple LLM providers to evaluate screenshots and aggregates results
|
|
217
|
+
* for improved accuracy (10-20% improvement with 3+ models).
|
|
218
|
+
*
|
|
219
|
+
* **Research:** Based on arXiv:2510.01499 - "Optimal LLM Aggregation"
|
|
220
|
+
*
|
|
221
|
+
* **Use when:** You need maximum reliability for critical evaluations
|
|
222
|
+
* (accessibility, quality checks, design validation).
|
|
223
|
+
*
|
|
224
|
+
* @example
|
|
225
|
+
* ```typescript
|
|
226
|
+
* const judge = new EnsembleJudge({
|
|
227
|
+
* judges: [
|
|
228
|
+
* new VLLMJudge({ provider: 'gemini' }),
|
|
229
|
+
* new VLLMJudge({ provider: 'openai' }),
|
|
230
|
+
* new VLLMJudge({ provider: 'claude' })
|
|
231
|
+
* ],
|
|
232
|
+
* votingMethod: 'weighted_average'
|
|
233
|
+
* });
|
|
234
|
+
*
|
|
235
|
+
* const result = await judge.evaluate(
|
|
236
|
+
* 'screenshot.png',
|
|
237
|
+
* 'Evaluate accessibility'
|
|
238
|
+
* );
|
|
239
|
+
*
|
|
240
|
+
* console.log(result.score); // Aggregated score
|
|
241
|
+
* console.log(result.agreement.score); // How much models agree
|
|
242
|
+
* ```
|
|
243
|
+
*/
|
|
213
244
|
export class EnsembleJudge {
|
|
245
|
+
/**
|
|
246
|
+
* Create a new Ensemble Judge instance.
|
|
247
|
+
*
|
|
248
|
+
* @param options - Ensemble options (judges, voting method, weights, etc.)
|
|
249
|
+
*/
|
|
214
250
|
constructor(options?: EnsembleJudgeOptions);
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Evaluate screenshot using multiple judges and aggregate results.
|
|
254
|
+
*
|
|
255
|
+
* @param imagePath - Path to screenshot
|
|
256
|
+
* @param prompt - Evaluation prompt
|
|
257
|
+
* @param context - Optional validation context
|
|
258
|
+
* @returns Promise resolving to EnsembleResult with aggregated score and agreement metrics
|
|
259
|
+
*/
|
|
215
260
|
evaluate(imagePath: string, prompt: string, context?: Record<string, unknown>): Promise<EnsembleResult>;
|
|
216
261
|
}
|
|
217
262
|
|
|
218
263
|
export function createEnsembleJudge(providers?: string[], options?: EnsembleJudgeOptions): EnsembleJudge;
|
|
219
264
|
|
|
220
265
|
// Core Types
|
|
266
|
+
/**
|
|
267
|
+
* Validation context for screenshot validation.
|
|
268
|
+
*
|
|
269
|
+
* Provides additional context to guide the AI evaluation, including test type,
|
|
270
|
+
* viewport information, game state, and optimization options.
|
|
271
|
+
*
|
|
272
|
+
* @example
|
|
273
|
+
* ```typescript
|
|
274
|
+
* const context: ValidationContext = {
|
|
275
|
+
* testType: 'accessibility',
|
|
276
|
+
* viewport: { width: 1920, height: 1080 },
|
|
277
|
+
* autoSelectTier: true,
|
|
278
|
+
* autoSelectProvider: true
|
|
279
|
+
* };
|
|
280
|
+
* ```
|
|
281
|
+
*/
|
|
221
282
|
export interface ValidationContext {
|
|
283
|
+
/** Test type identifier (e.g., 'accessibility', 'payment-screen', 'gameplay') */
|
|
222
284
|
testType?: string;
|
|
285
|
+
/** Viewport dimensions for context-aware evaluation */
|
|
223
286
|
viewport?: { width: number; height: number };
|
|
287
|
+
/** Game state or application state for context */
|
|
224
288
|
gameState?: Record<string, unknown>;
|
|
289
|
+
/** Enable caching (default: true) */
|
|
225
290
|
useCache?: boolean;
|
|
291
|
+
/** Request timeout in milliseconds */
|
|
226
292
|
timeout?: number;
|
|
293
|
+
/** Use explicit rubric for consistent scoring */
|
|
227
294
|
useRubric?: boolean;
|
|
295
|
+
/** Include dimension scores in evaluation */
|
|
228
296
|
includeDimensions?: boolean;
|
|
297
|
+
/** URL of the page being tested */
|
|
229
298
|
url?: string;
|
|
299
|
+
/** Description of the test scenario */
|
|
230
300
|
description?: string;
|
|
301
|
+
/** Current step in multi-step test */
|
|
231
302
|
step?: string;
|
|
303
|
+
/** Custom prompt builder function */
|
|
232
304
|
promptBuilder?: (prompt: string, context: ValidationContext) => string;
|
|
305
|
+
/** Auto-select model tier (fast/balanced/best) based on context */
|
|
306
|
+
autoSelectTier?: boolean;
|
|
307
|
+
/** Auto-select provider (cheapest available) */
|
|
308
|
+
autoSelectProvider?: boolean;
|
|
309
|
+
/** Include cost comparison in results */
|
|
310
|
+
includeCostComparison?: boolean;
|
|
311
|
+
/** Frequency for high-frequency validation (Hz) */
|
|
312
|
+
frequency?: number;
|
|
313
|
+
/** Cost sensitivity flag for optimization */
|
|
314
|
+
costSensitive?: boolean;
|
|
315
|
+
/** Criticality level (low/medium/high/critical) */
|
|
316
|
+
criticality?: 'low' | 'medium' | 'high' | 'critical';
|
|
317
|
+
/** Model tier to use (fast/balanced/best) */
|
|
318
|
+
modelTier?: 'fast' | 'balanced' | 'best';
|
|
319
|
+
/** Temporal decision options (for high-frequency validation) */
|
|
320
|
+
useTemporalDecision?: boolean;
|
|
321
|
+
/** Temporal notes for decision context */
|
|
322
|
+
temporalNotes?: TemporalNote[];
|
|
323
|
+
/** Current state for temporal decision */
|
|
324
|
+
currentState?: Record<string, unknown>;
|
|
325
|
+
/** Previous state for temporal decision */
|
|
326
|
+
previousState?: Record<string, unknown>;
|
|
327
|
+
/** Previous result for temporal decision */
|
|
328
|
+
previousResult?: ValidationResult;
|
|
329
|
+
/** Temporal decision manager options */
|
|
330
|
+
temporalDecisionOptions?: Record<string, unknown>;
|
|
331
|
+
/** Per-call visual anchors (appended to config-level anchors) */
|
|
332
|
+
anchors?: VisualAnchors | null;
|
|
233
333
|
}
|
|
234
334
|
|
|
235
335
|
export interface EstimatedCost {
|
|
@@ -250,25 +350,121 @@ export interface SemanticInfo {
|
|
|
250
350
|
zeroToleranceViolations?: string[];
|
|
251
351
|
}
|
|
252
352
|
|
|
353
|
+
/**
|
|
354
|
+
* Result of screenshot validation.
|
|
355
|
+
*
|
|
356
|
+
* Contains the AI's evaluation of the screenshot, including score, issues,
|
|
357
|
+
* reasoning, and metadata about the validation process.
|
|
358
|
+
*
|
|
359
|
+
* @example
|
|
360
|
+
* ```typescript
|
|
361
|
+
* const result: ValidationResult = {
|
|
362
|
+
* enabled: true,
|
|
363
|
+
* provider: 'gemini',
|
|
364
|
+
* score: 8.5,
|
|
365
|
+
* issues: ['Low contrast on submit button'],
|
|
366
|
+
* assessment: 'Good',
|
|
367
|
+
* reasoning: 'The form is mostly accessible...',
|
|
368
|
+
* estimatedCost: { totalCost: '0.000123', currency: 'USD' },
|
|
369
|
+
* responseTime: 1234,
|
|
370
|
+
* cached: false
|
|
371
|
+
* };
|
|
372
|
+
* ```
|
|
373
|
+
*/
|
|
253
374
|
export interface ValidationResult {
|
|
375
|
+
/** Whether validation was enabled (false if API key missing) */
|
|
254
376
|
enabled: boolean;
|
|
377
|
+
/** LLM provider used (gemini, openai, claude, groq) */
|
|
255
378
|
provider: string;
|
|
379
|
+
/** Quality score (0-10, null if validation failed) */
|
|
256
380
|
score: number | null;
|
|
381
|
+
/** List of issues found */
|
|
257
382
|
issues: string[];
|
|
383
|
+
/** Overall assessment (e.g., 'Good', 'Needs Improvement') */
|
|
258
384
|
assessment: string | null;
|
|
385
|
+
/** Detailed reasoning for the score */
|
|
259
386
|
reasoning: string;
|
|
387
|
+
/** Estimated API cost breakdown */
|
|
260
388
|
estimatedCost?: EstimatedCost | null;
|
|
389
|
+
/** Response time in milliseconds */
|
|
261
390
|
responseTime: number;
|
|
391
|
+
/** Whether result was served from cache */
|
|
262
392
|
cached?: boolean;
|
|
393
|
+
/** Raw judgment text from LLM */
|
|
263
394
|
judgment?: string;
|
|
395
|
+
/** Raw API response */
|
|
264
396
|
raw?: unknown;
|
|
397
|
+
/** Extracted semantic information */
|
|
265
398
|
semantic?: SemanticInfo;
|
|
399
|
+
/** Error message if validation failed */
|
|
266
400
|
error?: string;
|
|
401
|
+
/** Status message */
|
|
267
402
|
message?: string;
|
|
403
|
+
/** Provider pricing information */
|
|
268
404
|
pricing?: { input: number; output: number };
|
|
405
|
+
/** Timestamp of validation */
|
|
269
406
|
timestamp?: string;
|
|
407
|
+
/** Test name if provided */
|
|
270
408
|
testName?: string;
|
|
409
|
+
/** Viewport dimensions if provided */
|
|
271
410
|
viewport?: { width: number; height: number } | null;
|
|
411
|
+
/** Cost comparison information (if includeCostComparison enabled) */
|
|
412
|
+
costComparison?: {
|
|
413
|
+
current: { tier: string; provider: string; cost: number };
|
|
414
|
+
tiers: Record<string, number>;
|
|
415
|
+
savings: Record<string, { absolute: number; percent: number; cost: number }>;
|
|
416
|
+
recommendation: { tier: string; cost: number; savings: number; savingsPercent: number; reason: string };
|
|
417
|
+
};
|
|
418
|
+
/** Whether temporal decision skipped this call */
|
|
419
|
+
skipped?: boolean;
|
|
420
|
+
/** Reason for skipping (if skipped) */
|
|
421
|
+
skipReason?: string;
|
|
422
|
+
/** Urgency level (if temporal decision used) */
|
|
423
|
+
urgency?: 'low' | 'medium' | 'high';
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* A single visual anchor: either a plain text string or an object
|
|
428
|
+
* with optional dimension scoping and/or an image reference.
|
|
429
|
+
*
|
|
430
|
+
* Plain string: `"Card images large enough to see art"`
|
|
431
|
+
* With dimension: `{ text: "Card images large", dimension: "card_presentation" }`
|
|
432
|
+
* Image ref: `{ image: "/path/to/good.png", label: "Well-themed Magic layout" }`
|
|
433
|
+
* Image + dimension: `{ image: "/path/to/good.png", label: "...", dimension: "game_authenticity" }`
|
|
434
|
+
*
|
|
435
|
+
* Images accept a file path or a data URI (`data:image/png;base64,...`).
|
|
436
|
+
*/
|
|
437
|
+
export type AnchorEntry = string | {
|
|
438
|
+
/** Text description of the anchor signal */
|
|
439
|
+
text?: string;
|
|
440
|
+
/** File path or data URI of a reference screenshot */
|
|
441
|
+
image?: string;
|
|
442
|
+
/** Short label for the image (shown in prompt) */
|
|
443
|
+
label?: string;
|
|
444
|
+
/** Rubric dimension this anchor relates to (e.g., "game_authenticity") */
|
|
445
|
+
dimension?: string;
|
|
446
|
+
};
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Domain-level visual anchors for VLM evaluation grounding.
|
|
450
|
+
*
|
|
451
|
+
* Text anchors describe what to look for / flag in words.
|
|
452
|
+
* Image anchors provide reference screenshots as few-shot visual examples
|
|
453
|
+
* so the VLM can calibrate against concrete good/bad instances.
|
|
454
|
+
*
|
|
455
|
+
* Anchors can optionally be scoped to rubric dimensions via the
|
|
456
|
+
* `dimension` field on AnchorEntry objects.
|
|
457
|
+
*
|
|
458
|
+
* Set once in config for the project; per-call anchors in
|
|
459
|
+
* ValidationContext append to (not replace) config-level anchors.
|
|
460
|
+
*/
|
|
461
|
+
export interface VisualAnchors {
|
|
462
|
+
/** Brief domain description injected as context (e.g., "Card game search UI for TCG players") */
|
|
463
|
+
domain?: string;
|
|
464
|
+
/** Positive signals the VLM should look for (text and/or image entries) */
|
|
465
|
+
positive?: AnchorEntry[];
|
|
466
|
+
/** Negative signals the VLM should flag (text and/or image entries) */
|
|
467
|
+
negative?: AnchorEntry[];
|
|
272
468
|
}
|
|
273
469
|
|
|
274
470
|
export interface ConfigOptions {
|
|
@@ -280,6 +476,8 @@ export interface ConfigOptions {
|
|
|
280
476
|
maxConcurrency?: number;
|
|
281
477
|
timeout?: number;
|
|
282
478
|
verbose?: boolean;
|
|
479
|
+
/** Domain-level visual anchors included in every evaluation prompt */
|
|
480
|
+
anchors?: VisualAnchors | null;
|
|
283
481
|
}
|
|
284
482
|
|
|
285
483
|
export interface Config {
|
|
@@ -294,6 +492,8 @@ export interface Config {
|
|
|
294
492
|
priority: number;
|
|
295
493
|
};
|
|
296
494
|
enabled: boolean;
|
|
495
|
+
/** Normalized visual anchors (null when none configured) */
|
|
496
|
+
anchors: VisualAnchors | null;
|
|
297
497
|
cache: {
|
|
298
498
|
enabled: boolean;
|
|
299
499
|
dir: string | null;
|
|
@@ -307,28 +507,183 @@ export interface Config {
|
|
|
307
507
|
};
|
|
308
508
|
}
|
|
309
509
|
|
|
310
|
-
|
|
510
|
+
/**
|
|
511
|
+
* VLLM Judge Class
|
|
512
|
+
*
|
|
513
|
+
* Core screenshot validation engine using Vision Language Models.
|
|
514
|
+
* Supports multiple providers (Gemini, OpenAI, Claude, Groq) with automatic
|
|
515
|
+
* selection, caching, and cost optimization.
|
|
516
|
+
*
|
|
517
|
+
* **Use when:** You need fine-grained control over validation or custom judge implementations.
|
|
518
|
+
* **Otherwise:** Use `validateScreenshot()` function for simpler API.
|
|
519
|
+
*
|
|
520
|
+
* @example
|
|
521
|
+
* ```typescript
|
|
522
|
+
* // Create custom judge instance
|
|
523
|
+
* const judge = new VLLMJudge({
|
|
524
|
+
* provider: 'gemini',
|
|
525
|
+
* apiKey: process.env.GEMINI_API_KEY,
|
|
526
|
+
* cacheEnabled: true
|
|
527
|
+
* });
|
|
528
|
+
*
|
|
529
|
+
* const result = await judge.judgeScreenshot(
|
|
530
|
+
* 'screenshot.png',
|
|
531
|
+
* 'Evaluate this page'
|
|
532
|
+
* );
|
|
533
|
+
* ```
|
|
534
|
+
*/
|
|
311
535
|
export class VLLMJudge {
|
|
536
|
+
/**
|
|
537
|
+
* Create a new VLLM Judge instance.
|
|
538
|
+
*
|
|
539
|
+
* @param options - Configuration options (provider, API key, cache, etc.)
|
|
540
|
+
*/
|
|
312
541
|
constructor(options?: ConfigOptions);
|
|
542
|
+
|
|
543
|
+
/** Current provider name (gemini, openai, claude, groq) */
|
|
313
544
|
provider: string;
|
|
545
|
+
/** API key for current provider */
|
|
314
546
|
apiKey: string | null;
|
|
547
|
+
/** Provider configuration (model, pricing, etc.) */
|
|
315
548
|
providerConfig: Config['providerConfig'];
|
|
549
|
+
/** Whether validation is enabled (false if API key missing) */
|
|
316
550
|
enabled: boolean;
|
|
317
551
|
|
|
552
|
+
/**
|
|
553
|
+
* Convert image file to base64 string for API.
|
|
554
|
+
*
|
|
555
|
+
* @param imagePath - Path to image file
|
|
556
|
+
* @returns Base64-encoded image string
|
|
557
|
+
* @throws {FileError} If file not found or invalid format
|
|
558
|
+
*/
|
|
318
559
|
imageToBase64(imagePath: string): string;
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* Build evaluation prompt with context.
|
|
563
|
+
*
|
|
564
|
+
* @param prompt - Base evaluation prompt
|
|
565
|
+
* @param context - Validation context
|
|
566
|
+
* @returns Enhanced prompt with context
|
|
567
|
+
*/
|
|
319
568
|
buildPrompt(prompt: string, context: ValidationContext): string;
|
|
569
|
+
|
|
570
|
+
/**
|
|
571
|
+
* Extract semantic information from judgment text.
|
|
572
|
+
*
|
|
573
|
+
* @param judgment - Judgment text or object
|
|
574
|
+
* @returns Structured semantic information
|
|
575
|
+
*/
|
|
320
576
|
extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
577
|
+
|
|
578
|
+
/**
|
|
579
|
+
* Estimate API cost for validation.
|
|
580
|
+
*
|
|
581
|
+
* @param data - API request/response data
|
|
582
|
+
* @param provider - Provider name
|
|
583
|
+
* @returns Estimated cost breakdown or null
|
|
584
|
+
*/
|
|
321
585
|
estimateCost(data: unknown, provider: string): EstimatedCost | null;
|
|
322
|
-
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Judge a screenshot using VLLM.
|
|
589
|
+
*
|
|
590
|
+
* @param imagePath - Path to screenshot or array for comparison
|
|
591
|
+
* @param prompt - Evaluation prompt
|
|
592
|
+
* @param context - Optional validation context
|
|
593
|
+
* @returns Promise resolving to ValidationResult
|
|
594
|
+
*/
|
|
595
|
+
judgeScreenshot(imagePath: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult>;
|
|
323
596
|
}
|
|
324
597
|
|
|
325
598
|
// Core Functions
|
|
599
|
+
/**
|
|
600
|
+
* Validate a screenshot using Vision Language Models (VLLM).
|
|
601
|
+
*
|
|
602
|
+
* This is the primary API function. It takes a screenshot and evaluation prompt,
|
|
603
|
+
* sends it to an AI model (Gemini, OpenAI, Claude, or Groq), and returns structured
|
|
604
|
+
* validation results with score, issues, and reasoning.
|
|
605
|
+
*
|
|
606
|
+
* **Key Features:**
|
|
607
|
+
* - Automatic provider selection (cheapest available)
|
|
608
|
+
* - Automatic tier selection (fast/balanced/best)
|
|
609
|
+
* - Built-in caching (7-day TTL)
|
|
610
|
+
* - Cost optimization
|
|
611
|
+
* - Temporal decision making (for high-frequency validation)
|
|
612
|
+
*
|
|
613
|
+
* @param imagePath - Path to screenshot file (PNG, JPEG, GIF, WebP) or array of paths for comparison
|
|
614
|
+
* @param prompt - Evaluation prompt (e.g., "Is this accessible?", "Check if payment form works")
|
|
615
|
+
* @param context - Optional validation context (testType, viewport, optimization options)
|
|
616
|
+
* @returns Promise resolving to ValidationResult with score, issues, reasoning, and metadata
|
|
617
|
+
*
|
|
618
|
+
* @example
|
|
619
|
+
* ```typescript
|
|
620
|
+
* // Basic usage
|
|
621
|
+
* const result = await validateScreenshot(
|
|
622
|
+
* 'screenshot.png',
|
|
623
|
+
* 'Check if this payment form is accessible'
|
|
624
|
+
* );
|
|
625
|
+
* console.log(result.score); // 8.5 (0-10 scale)
|
|
626
|
+
* console.log(result.issues); // ['Low contrast on button', 'Missing label']
|
|
627
|
+
* console.log(result.reasoning); // "The form is mostly accessible..."
|
|
628
|
+
* ```
|
|
629
|
+
*
|
|
630
|
+
* @example
|
|
631
|
+
* ```typescript
|
|
632
|
+
* // With cost optimization
|
|
633
|
+
* const result = await validateScreenshot(
|
|
634
|
+
* 'screenshot.png',
|
|
635
|
+
* 'Evaluate accessibility',
|
|
636
|
+
* {
|
|
637
|
+
* autoSelectTier: true,
|
|
638
|
+
* autoSelectProvider: true,
|
|
639
|
+
* includeCostComparison: true
|
|
640
|
+
* }
|
|
641
|
+
* );
|
|
642
|
+
* console.log(result.costComparison?.savings.fast?.percent); // 45% savings
|
|
643
|
+
* ```
|
|
644
|
+
*
|
|
645
|
+
* @example
|
|
646
|
+
* ```typescript
|
|
647
|
+
* // High-frequency validation (60Hz)
|
|
648
|
+
* const result = await validateScreenshot(
|
|
649
|
+
* 'frame.png',
|
|
650
|
+
* 'Is the game playable?',
|
|
651
|
+
* {
|
|
652
|
+
* frequency: 60,
|
|
653
|
+
* autoSelectTier: true,
|
|
654
|
+
* useTemporalDecision: true
|
|
655
|
+
* }
|
|
656
|
+
* );
|
|
657
|
+
* ```
|
|
658
|
+
*
|
|
659
|
+
* @throws {FileError} If screenshot file not found or invalid format
|
|
660
|
+
* @throws {ValidationError} If validation fails
|
|
661
|
+
* @throws {ProviderError} If API provider error occurs
|
|
662
|
+
* @throws {TimeoutError} If request times out
|
|
663
|
+
*/
|
|
326
664
|
export function validateScreenshot(
|
|
327
|
-
imagePath: string,
|
|
665
|
+
imagePath: string | string[],
|
|
328
666
|
prompt: string,
|
|
329
667
|
context?: ValidationContext
|
|
330
668
|
): Promise<ValidationResult>;
|
|
331
669
|
|
|
670
|
+
/**
|
|
671
|
+
* Extract semantic information from VLLM judgment text.
|
|
672
|
+
*
|
|
673
|
+
* Parses AI judgment responses into structured data (score, issues, reasoning).
|
|
674
|
+
* Useful for custom implementations that need to parse judgment text.
|
|
675
|
+
*
|
|
676
|
+
* @param judgment - Judgment text or object from VLLM
|
|
677
|
+
* @returns Structured semantic information with score, issues, assessment, reasoning
|
|
678
|
+
*
|
|
679
|
+
* @example
|
|
680
|
+
* ```typescript
|
|
681
|
+
* const judgment = "Score: 8.5. Issues: Low contrast. Reasoning: The form is mostly accessible...";
|
|
682
|
+
* const info = extractSemanticInfo(judgment);
|
|
683
|
+
* console.log(info.score); // 8.5
|
|
684
|
+
* console.log(info.issues); // ['Low contrast']
|
|
685
|
+
* ```
|
|
686
|
+
*/
|
|
332
687
|
export function extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
333
688
|
|
|
334
689
|
// Multi-Modal Types
|
|
@@ -405,11 +760,33 @@ export function multiModalValidation(
|
|
|
405
760
|
}>;
|
|
406
761
|
|
|
407
762
|
// Temporal Types
|
|
763
|
+
/**
|
|
764
|
+
* Temporal note for tracking state over time.
|
|
765
|
+
*
|
|
766
|
+
* Used in high-frequency validation (10-60Hz) to track observations
|
|
767
|
+
* and enable temporal decision making (reduces LLM calls by 98.5%).
|
|
768
|
+
*
|
|
769
|
+
* @example
|
|
770
|
+
* ```typescript
|
|
771
|
+
* const note: TemporalNote = {
|
|
772
|
+
* timestamp: Date.now(),
|
|
773
|
+
* elapsed: 100,
|
|
774
|
+
* score: 8.5,
|
|
775
|
+
* observation: 'Button clicked',
|
|
776
|
+
* step: 'checkout'
|
|
777
|
+
* };
|
|
778
|
+
* ```
|
|
779
|
+
*/
|
|
408
780
|
export interface TemporalNote {
|
|
781
|
+
/** Timestamp in milliseconds */
|
|
409
782
|
timestamp?: number;
|
|
783
|
+
/** Elapsed time since start in milliseconds */
|
|
410
784
|
elapsed?: number;
|
|
785
|
+
/** Quality score (0-10) */
|
|
411
786
|
score?: number;
|
|
787
|
+
/** Observation description */
|
|
412
788
|
observation?: string;
|
|
789
|
+
/** Step identifier */
|
|
413
790
|
step?: string;
|
|
414
791
|
}
|
|
415
792
|
|
|
@@ -437,6 +814,36 @@ export interface AggregatedTemporalNotes {
|
|
|
437
814
|
}
|
|
438
815
|
|
|
439
816
|
// Temporal Functions
|
|
817
|
+
/**
|
|
818
|
+
* Aggregate temporal notes into time windows with weighted scores.
|
|
819
|
+
*
|
|
820
|
+
* Used for high-frequency validation to reduce LLM calls by aggregating
|
|
821
|
+
* observations over time windows. Implements exponential decay weighting
|
|
822
|
+
* (recent notes weighted more heavily).
|
|
823
|
+
*
|
|
824
|
+
* **Research:** Inspired by arXiv:2505.17663 (DynToM) and arXiv:2507.15851
|
|
825
|
+
* (Human Temporal Cognition), adapted with exponential decay for practical use.
|
|
826
|
+
*
|
|
827
|
+
* @param notes - Array of temporal notes to aggregate
|
|
828
|
+
* @param options - Aggregation options
|
|
829
|
+
* @param options.windowSize - Time window size in milliseconds (default: 1000)
|
|
830
|
+
* @param options.decayFactor - Exponential decay factor (default: 0.9)
|
|
831
|
+
* @param options.coherenceThreshold - Coherence threshold for filtering (default: 0.5)
|
|
832
|
+
* @returns Aggregated notes with windows, summary, and coherence score
|
|
833
|
+
*
|
|
834
|
+
* @example
|
|
835
|
+
* ```typescript
|
|
836
|
+
* const notes: TemporalNote[] = [
|
|
837
|
+
* { timestamp: 0, score: 8, observation: 'Initial state' },
|
|
838
|
+
* { timestamp: 100, score: 8.5, observation: 'Button clicked' },
|
|
839
|
+
* { timestamp: 200, score: 9, observation: 'Form submitted' }
|
|
840
|
+
* ];
|
|
841
|
+
*
|
|
842
|
+
* const aggregated = aggregateTemporalNotes(notes);
|
|
843
|
+
* console.log(aggregated.coherence); // 0.92 (high coherence)
|
|
844
|
+
* console.log(aggregated.windows[0].avgScore); // 8.5
|
|
845
|
+
* ```
|
|
846
|
+
*/
|
|
440
847
|
export function aggregateTemporalNotes(
|
|
441
848
|
notes: TemporalNote[],
|
|
442
849
|
options?: {
|
|
@@ -450,6 +857,223 @@ export function formatNotesForPrompt(aggregated: AggregatedTemporalNotes): strin
|
|
|
450
857
|
|
|
451
858
|
export function calculateCoherence(windows: TemporalWindow[]): number;
|
|
452
859
|
|
|
860
|
+
/**
|
|
861
|
+
* Temporal Decision Manager
|
|
862
|
+
*
|
|
863
|
+
* Decides when to call LLM vs. reuse previous result for high-frequency validation.
|
|
864
|
+
* Reduces LLM calls by 98.5% while maintaining accuracy through temporal coherence.
|
|
865
|
+
*
|
|
866
|
+
* **Research:** Based on arXiv:2406.12125 - "Efficient Sequential Decision Making with Large Language Models"
|
|
867
|
+
*
|
|
868
|
+
* **Core Insight:** Don't prompt on every state change, prompt when decision is needed.
|
|
869
|
+
*
|
|
870
|
+
* **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
|
|
871
|
+
*
|
|
872
|
+
* @example
|
|
873
|
+
* ```typescript
|
|
874
|
+
* const manager = new TemporalDecisionManager({
|
|
875
|
+
* minNotesForPrompt: 3,
|
|
876
|
+
* coherenceThreshold: 0.5,
|
|
877
|
+
* urgencyThreshold: 0.3
|
|
878
|
+
* });
|
|
879
|
+
*
|
|
880
|
+
* const decision = await manager.shouldPrompt(
|
|
881
|
+
* currentState,
|
|
882
|
+
* previousState,
|
|
883
|
+
* temporalNotes,
|
|
884
|
+
* context
|
|
885
|
+
* );
|
|
886
|
+
*
|
|
887
|
+
* if (decision.shouldPrompt) {
|
|
888
|
+
* // Call LLM
|
|
889
|
+
* } else {
|
|
890
|
+
* // Reuse previous result
|
|
891
|
+
* }
|
|
892
|
+
* ```
|
|
893
|
+
*/
|
|
894
|
+
export class TemporalDecisionManager {
|
|
895
|
+
/**
|
|
896
|
+
* Create a new Temporal Decision Manager.
|
|
897
|
+
*
|
|
898
|
+
* @param options - Decision manager options
|
|
899
|
+
* @param options.minNotesForPrompt - Minimum notes before prompting (default: 3)
|
|
900
|
+
* @param options.coherenceThreshold - Coherence threshold for prompting (default: 0.5)
|
|
901
|
+
* @param options.urgencyThreshold - Urgency threshold for prompting (default: 0.3)
|
|
902
|
+
* @param options.maxWaitTime - Maximum wait time before forcing prompt (default: 10000ms)
|
|
903
|
+
* @param options.stateChangeThreshold - State change threshold for prompting (default: 0.2)
|
|
904
|
+
* @param options.warmStartSteps - Use LLM for first N steps (default: 10)
|
|
905
|
+
* @param options.adaptiveSampling - Enable adaptive sampling (default: true)
|
|
906
|
+
*/
|
|
907
|
+
constructor(options?: {
|
|
908
|
+
minNotesForPrompt?: number;
|
|
909
|
+
coherenceThreshold?: number;
|
|
910
|
+
urgencyThreshold?: number;
|
|
911
|
+
maxWaitTime?: number;
|
|
912
|
+
stateChangeThreshold?: number;
|
|
913
|
+
warmStartSteps?: number;
|
|
914
|
+
adaptiveSampling?: boolean;
|
|
915
|
+
});
|
|
916
|
+
|
|
917
|
+
/**
|
|
918
|
+
* Decide if we should prompt now or wait for more context.
|
|
919
|
+
*
|
|
920
|
+
* @param currentState - Current state object
|
|
921
|
+
* @param previousState - Previous state object (if any)
|
|
922
|
+
* @param temporalNotes - Array of temporal notes
|
|
923
|
+
* @param context - Additional context
|
|
924
|
+
* @returns Decision object with shouldPrompt, reason, and urgency
|
|
925
|
+
*/
|
|
926
|
+
shouldPrompt(
|
|
927
|
+
currentState: Record<string, unknown>,
|
|
928
|
+
previousState: Record<string, unknown> | null,
|
|
929
|
+
temporalNotes: TemporalNote[],
|
|
930
|
+
context?: Record<string, unknown>
|
|
931
|
+
): Promise<{
|
|
932
|
+
shouldPrompt: boolean;
|
|
933
|
+
reason: string;
|
|
934
|
+
urgency: 'low' | 'medium' | 'high';
|
|
935
|
+
}>;
|
|
936
|
+
|
|
937
|
+
/**
|
|
938
|
+
* Calculate state change magnitude.
|
|
939
|
+
*
|
|
940
|
+
* @param currentState - Current state
|
|
941
|
+
* @param previousState - Previous state
|
|
942
|
+
* @returns State change score (0-1)
|
|
943
|
+
*/
|
|
944
|
+
calculateStateChange(
|
|
945
|
+
currentState: Record<string, unknown>,
|
|
946
|
+
previousState: Record<string, unknown> | null
|
|
947
|
+
): number;
|
|
948
|
+
|
|
949
|
+
/**
|
|
950
|
+
* Check if current state is a decision point.
|
|
951
|
+
*
|
|
952
|
+
* @param currentState - Current state
|
|
953
|
+
* @param context - Additional context
|
|
954
|
+
* @returns True if decision point
|
|
955
|
+
*/
|
|
956
|
+
isDecisionPoint(
|
|
957
|
+
currentState: Record<string, unknown>,
|
|
958
|
+
context?: Record<string, unknown>
|
|
959
|
+
): boolean;
|
|
960
|
+
|
|
961
|
+
/**
|
|
962
|
+
* Check if there's a recent user action.
|
|
963
|
+
*
|
|
964
|
+
* @param temporalNotes - Array of temporal notes
|
|
965
|
+
* @param context - Additional context
|
|
966
|
+
* @returns True if recent user action detected
|
|
967
|
+
*/
|
|
968
|
+
hasRecentUserAction(
|
|
969
|
+
temporalNotes: TemporalNote[],
|
|
970
|
+
context?: Record<string, unknown>
|
|
971
|
+
): boolean;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
/**
|
|
975
|
+
* Create a temporal decision manager with default options.
|
|
976
|
+
*
|
|
977
|
+
* @param options - Decision manager options
|
|
978
|
+
* @returns New TemporalDecisionManager instance
|
|
979
|
+
*/
|
|
980
|
+
export function createTemporalDecisionManager(options?: {
|
|
981
|
+
minNotesForPrompt?: number;
|
|
982
|
+
coherenceThreshold?: number;
|
|
983
|
+
urgencyThreshold?: number;
|
|
984
|
+
maxWaitTime?: number;
|
|
985
|
+
stateChangeThreshold?: number;
|
|
986
|
+
warmStartSteps?: number;
|
|
987
|
+
adaptiveSampling?: boolean;
|
|
988
|
+
}): TemporalDecisionManager;
|
|
989
|
+
|
|
990
|
+
/**
|
|
991
|
+
* Temporal Preprocessing Manager
|
|
992
|
+
*
|
|
993
|
+
* Optimizes temporal note processing for high-frequency validation (10-60Hz).
|
|
994
|
+
* Implements activity-based preprocessing patterns to reduce computational overhead.
|
|
995
|
+
*
|
|
996
|
+
* **Note:** Implementation is obfuscated to protect proprietary algorithms, but API is fully documented.
|
|
997
|
+
*
|
|
998
|
+
* @example
|
|
999
|
+
* ```typescript
|
|
1000
|
+
* const manager = new TemporalPreprocessingManager({
|
|
1001
|
+
* activityThreshold: 0.5,
|
|
1002
|
+
* highFrequencyMode: true
|
|
1003
|
+
* });
|
|
1004
|
+
*
|
|
1005
|
+
* const processed = await manager.preprocess(temporalNotes, context);
|
|
1006
|
+
* ```
|
|
1007
|
+
*/
|
|
1008
|
+
export class TemporalPreprocessingManager {
|
|
1009
|
+
/**
|
|
1010
|
+
* Create a new Temporal Preprocessing Manager.
|
|
1011
|
+
*
|
|
1012
|
+
* @param options - Preprocessing options
|
|
1013
|
+
*/
|
|
1014
|
+
constructor(options?: Record<string, unknown>);
|
|
1015
|
+
|
|
1016
|
+
/**
|
|
1017
|
+
* Preprocess temporal notes for efficient handling.
|
|
1018
|
+
*
|
|
1019
|
+
* @param notes - Array of temporal notes
|
|
1020
|
+
* @param context - Additional context
|
|
1021
|
+
* @returns Processed notes
|
|
1022
|
+
*/
|
|
1023
|
+
preprocess(
|
|
1024
|
+
notes: TemporalNote[],
|
|
1025
|
+
context?: Record<string, unknown>
|
|
1026
|
+
): Promise<TemporalNote[]>;
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
/**
|
|
1030
|
+
* Adaptive Temporal Processor
|
|
1031
|
+
*
|
|
1032
|
+
* Adaptively processes temporal notes based on activity patterns.
|
|
1033
|
+
*
|
|
1034
|
+
* @example
|
|
1035
|
+
* ```typescript
|
|
1036
|
+
* const processor = new AdaptiveTemporalProcessor();
|
|
1037
|
+
* const processed = await processor.process(notes, context);
|
|
1038
|
+
* ```
|
|
1039
|
+
*/
|
|
1040
|
+
export class AdaptiveTemporalProcessor {
|
|
1041
|
+
/**
|
|
1042
|
+
* Create a new Adaptive Temporal Processor.
|
|
1043
|
+
*
|
|
1044
|
+
* @param options - Processor options
|
|
1045
|
+
*/
|
|
1046
|
+
constructor(options?: Record<string, unknown>);
|
|
1047
|
+
|
|
1048
|
+
/**
|
|
1049
|
+
* Process temporal notes adaptively.
|
|
1050
|
+
*
|
|
1051
|
+
* @param notes - Array of temporal notes
|
|
1052
|
+
* @param context - Additional context
|
|
1053
|
+
* @returns Processed notes
|
|
1054
|
+
*/
|
|
1055
|
+
process(
|
|
1056
|
+
notes: TemporalNote[],
|
|
1057
|
+
context?: Record<string, unknown>
|
|
1058
|
+
): Promise<TemporalNote[]>;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
/**
|
|
1062
|
+
* Create a temporal preprocessing manager with default options.
|
|
1063
|
+
*
|
|
1064
|
+
* @param options - Preprocessing options
|
|
1065
|
+
* @returns New TemporalPreprocessingManager instance
|
|
1066
|
+
*/
|
|
1067
|
+
export function createTemporalPreprocessingManager(options?: Record<string, unknown>): TemporalPreprocessingManager;
|
|
1068
|
+
|
|
1069
|
+
/**
|
|
1070
|
+
* Create an adaptive temporal processor with default options.
|
|
1071
|
+
*
|
|
1072
|
+
* @param options - Processor options
|
|
1073
|
+
* @returns New AdaptiveTemporalProcessor instance
|
|
1074
|
+
*/
|
|
1075
|
+
export function createAdaptiveTemporalProcessor(options?: Record<string, unknown>): AdaptiveTemporalProcessor;
|
|
1076
|
+
|
|
453
1077
|
// Cache Types
|
|
454
1078
|
export interface CacheStats {
|
|
455
1079
|
hits: number;
|
|
@@ -459,16 +1083,77 @@ export interface CacheStats {
|
|
|
459
1083
|
}
|
|
460
1084
|
|
|
461
1085
|
// Cache Functions
|
|
1086
|
+
/**
|
|
1087
|
+
* Initialize cache system.
|
|
1088
|
+
*
|
|
1089
|
+
* Sets up file-based caching with 7-day TTL. Cache persists across
|
|
1090
|
+
* process restarts and reduces API costs by serving cached results.
|
|
1091
|
+
*
|
|
1092
|
+
* @param cacheDir - Cache directory path (default: `.cache/ai-visual-test`)
|
|
1093
|
+
*
|
|
1094
|
+
* @example
|
|
1095
|
+
* ```typescript
|
|
1096
|
+
* initCache('/tmp/my-cache');
|
|
1097
|
+
* const result = await validateScreenshot('screenshot.png', 'Evaluate');
|
|
1098
|
+
* // Subsequent calls with same screenshot/prompt use cache
|
|
1099
|
+
* ```
|
|
1100
|
+
*/
|
|
462
1101
|
export function initCache(cacheDir?: string): void;
|
|
1102
|
+
|
|
1103
|
+
/**
|
|
1104
|
+
* Generate cache key for validation request.
|
|
1105
|
+
*
|
|
1106
|
+
* Creates SHA-256 hash of image path, prompt, and context for cache lookup.
|
|
1107
|
+
*
|
|
1108
|
+
* @param imagePath - Screenshot path
|
|
1109
|
+
* @param prompt - Evaluation prompt
|
|
1110
|
+
* @param context - Validation context
|
|
1111
|
+
* @returns Cache key string
|
|
1112
|
+
*/
|
|
463
1113
|
export function generateCacheKey(imagePath: string, prompt: string, context?: ValidationContext): string;
|
|
1114
|
+
|
|
1115
|
+
/**
|
|
1116
|
+
* Get cached validation result.
|
|
1117
|
+
*
|
|
1118
|
+
* @param imagePath - Screenshot path
|
|
1119
|
+
* @param prompt - Evaluation prompt
|
|
1120
|
+
* @param context - Validation context
|
|
1121
|
+
* @returns Cached ValidationResult or null if not cached
|
|
1122
|
+
*/
|
|
464
1123
|
export function getCached(imagePath: string, prompt: string, context?: ValidationContext): ValidationResult | null;
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Cache validation result.
|
|
1127
|
+
*
|
|
1128
|
+
* @param imagePath - Screenshot path
|
|
1129
|
+
* @param prompt - Evaluation prompt
|
|
1130
|
+
* @param context - Validation context
|
|
1131
|
+
* @param result - Validation result to cache
|
|
1132
|
+
*/
|
|
465
1133
|
export function setCached(
|
|
466
1134
|
imagePath: string,
|
|
467
1135
|
prompt: string,
|
|
468
1136
|
context: ValidationContext,
|
|
469
1137
|
result: ValidationResult
|
|
470
1138
|
): void;
|
|
1139
|
+
|
|
1140
|
+
/**
|
|
1141
|
+
* Clear all cached results.
|
|
1142
|
+
*/
|
|
471
1143
|
export function clearCache(): void;
|
|
1144
|
+
|
|
1145
|
+
/**
|
|
1146
|
+
* Get cache statistics.
|
|
1147
|
+
*
|
|
1148
|
+
* @returns Cache stats (hits, misses, size, hit rate)
|
|
1149
|
+
*
|
|
1150
|
+
* @example
|
|
1151
|
+
* ```typescript
|
|
1152
|
+
* const stats = getCacheStats();
|
|
1153
|
+
* console.log(`Hit rate: ${stats.hitRate * 100}%`); // 85%
|
|
1154
|
+
* console.log(`Cache size: ${stats.size}`); // 123
|
|
1155
|
+
* ```
|
|
1156
|
+
*/
|
|
472
1157
|
export function getCacheStats(): CacheStats;
|
|
473
1158
|
|
|
474
1159
|
// Config Functions
|
|
@@ -507,10 +1192,58 @@ export class ScoreTracker {
|
|
|
507
1192
|
}
|
|
508
1193
|
|
|
509
1194
|
// BatchOptimizer Class
|
|
1195
|
+
/**
|
|
1196
|
+
* Batch Optimizer
|
|
1197
|
+
*
|
|
1198
|
+
* Optimizes validation of multiple screenshots by batching requests,
|
|
1199
|
+
* managing concurrency, and caching results.
|
|
1200
|
+
*
|
|
1201
|
+
* **Use when:** You need to validate multiple screenshots efficiently.
|
|
1202
|
+
*
|
|
1203
|
+
* @example
|
|
1204
|
+
* ```typescript
|
|
1205
|
+
* const optimizer = new BatchOptimizer({
|
|
1206
|
+
* maxConcurrency: 5,
|
|
1207
|
+
* batchSize: 10,
|
|
1208
|
+
* cacheEnabled: true
|
|
1209
|
+
* });
|
|
1210
|
+
*
|
|
1211
|
+
* const results = await optimizer.batchValidate(
|
|
1212
|
+
* ['screenshot1.png', 'screenshot2.png', 'screenshot3.png'],
|
|
1213
|
+
* 'Evaluate accessibility'
|
|
1214
|
+
* );
|
|
1215
|
+
*
|
|
1216
|
+
* console.log(results.length); // 3
|
|
1217
|
+
* ```
|
|
1218
|
+
*/
|
|
510
1219
|
export class BatchOptimizer {
|
|
1220
|
+
/**
|
|
1221
|
+
* Create a new Batch Optimizer instance.
|
|
1222
|
+
*
|
|
1223
|
+
* @param options - Optimizer options (maxConcurrency, batchSize, cacheEnabled)
|
|
1224
|
+
*/
|
|
511
1225
|
constructor(options?: { maxConcurrency?: number; batchSize?: number; cacheEnabled?: boolean });
|
|
1226
|
+
|
|
1227
|
+
/**
|
|
1228
|
+
* Validate multiple screenshots in batch.
|
|
1229
|
+
*
|
|
1230
|
+
* @param imagePaths - Single path, array of paths, or array of arrays for comparison
|
|
1231
|
+
* @param prompt - Evaluation prompt
|
|
1232
|
+
* @param context - Optional validation context
|
|
1233
|
+
* @returns Promise resolving to array of ValidationResults
|
|
1234
|
+
*/
|
|
512
1235
|
batchValidate(imagePaths: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult[]>;
|
|
1236
|
+
|
|
1237
|
+
/**
|
|
1238
|
+
* Clear batch optimizer cache.
|
|
1239
|
+
*/
|
|
513
1240
|
clearCache(): void;
|
|
1241
|
+
|
|
1242
|
+
/**
|
|
1243
|
+
* Get cache statistics.
|
|
1244
|
+
*
|
|
1245
|
+
* @returns Cache stats (size, queue length, active requests)
|
|
1246
|
+
*/
|
|
514
1247
|
getCacheStats(): { cacheSize: number; queueLength: number; activeRequests: number };
|
|
515
1248
|
}
|
|
516
1249
|
|
|
@@ -775,18 +1508,76 @@ export interface StateValidationResult<T = unknown> extends ValidationResult {
|
|
|
775
1508
|
matches: boolean;
|
|
776
1509
|
}
|
|
777
1510
|
|
|
1511
|
+
/**
|
|
1512
|
+
* State Validator
|
|
1513
|
+
*
|
|
1514
|
+
* Validates that visual state matches expected state using VLLM extraction.
|
|
1515
|
+
* Extracts state from screenshot and compares with expected state.
|
|
1516
|
+
*
|
|
1517
|
+
* **Use when:** You need to verify specific state values (cart count, button text, etc.)
|
|
1518
|
+
*
|
|
1519
|
+
* @example
|
|
1520
|
+
* ```typescript
|
|
1521
|
+
* const validator = new StateValidator();
|
|
1522
|
+
*
|
|
1523
|
+
* const result = await validator.validateState(
|
|
1524
|
+
* 'checkout.png',
|
|
1525
|
+
* {
|
|
1526
|
+
* cartCount: 1,
|
|
1527
|
+
* buttonText: 'Checkout'
|
|
1528
|
+
* },
|
|
1529
|
+
* {
|
|
1530
|
+
* testType: 'cart-state'
|
|
1531
|
+
* }
|
|
1532
|
+
* );
|
|
1533
|
+
*
|
|
1534
|
+
* console.log(result.matches); // true/false
|
|
1535
|
+
* console.log(result.discrepancies); // ['cartCount: expected 1, got 2']
|
|
1536
|
+
* ```
|
|
1537
|
+
*/
|
|
778
1538
|
export class StateValidator<T = unknown> {
|
|
1539
|
+
/**
|
|
1540
|
+
* Create a new State Validator instance.
|
|
1541
|
+
*
|
|
1542
|
+
* @param options - Validator options (tolerance, state extractor, etc.)
|
|
1543
|
+
*/
|
|
779
1544
|
constructor(options?: StateValidatorOptions<T>);
|
|
1545
|
+
|
|
1546
|
+
/**
|
|
1547
|
+
* Validate state (static method).
|
|
1548
|
+
*
|
|
1549
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1550
|
+
* @param expectedState - Expected state object
|
|
1551
|
+
* @param options - Validation options
|
|
1552
|
+
* @returns Promise resolving to StateValidationResult
|
|
1553
|
+
*/
|
|
780
1554
|
static validate<T = unknown>(
|
|
781
1555
|
screenshotPath: string | string[],
|
|
782
1556
|
expectedState: T,
|
|
783
1557
|
options?: StateValidationOptions<T>
|
|
784
1558
|
): Promise<StateValidationResult<T>>;
|
|
1559
|
+
|
|
1560
|
+
/**
|
|
1561
|
+
* Validate state matches expected state.
|
|
1562
|
+
*
|
|
1563
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1564
|
+
* @param expectedState - Expected state object
|
|
1565
|
+
* @param options - Validation options
|
|
1566
|
+
* @returns Promise resolving to StateValidationResult
|
|
1567
|
+
*/
|
|
785
1568
|
validateState(
|
|
786
1569
|
screenshotPath: string | string[],
|
|
787
1570
|
expectedState: T,
|
|
788
1571
|
options?: StateValidationOptions<T>
|
|
789
1572
|
): Promise<StateValidationResult<T>>;
|
|
1573
|
+
|
|
1574
|
+
/**
|
|
1575
|
+
* Build state validation prompt.
|
|
1576
|
+
*
|
|
1577
|
+
* @param expectedState - Expected state object
|
|
1578
|
+
* @param options - Validation options
|
|
1579
|
+
* @returns Validation prompt string
|
|
1580
|
+
*/
|
|
790
1581
|
buildStatePrompt(expectedState: T, options?: StateValidationOptions<T>): string;
|
|
791
1582
|
}
|
|
792
1583
|
|
|
@@ -820,22 +1611,90 @@ export interface AccessibilityResult extends ValidationResult {
|
|
|
820
1611
|
standards: string[];
|
|
821
1612
|
}
|
|
822
1613
|
|
|
1614
|
+
/**
|
|
1615
|
+
* Accessibility Validator
|
|
1616
|
+
*
|
|
1617
|
+
* Validates accessibility using VLLM semantic evaluation.
|
|
1618
|
+
* Checks contrast, labels, keyboard navigation, error messages, and WCAG compliance.
|
|
1619
|
+
*
|
|
1620
|
+
* **Use when:** You need comprehensive accessibility validation beyond programmatic checks.
|
|
1621
|
+
*
|
|
1622
|
+
* @example
|
|
1623
|
+
* ```typescript
|
|
1624
|
+
* const validator = new AccessibilityValidator({
|
|
1625
|
+
* minContrast: 4.5,
|
|
1626
|
+
* standards: ['WCAG-AA']
|
|
1627
|
+
* });
|
|
1628
|
+
*
|
|
1629
|
+
* const result = await validator.validateAccessibility(
|
|
1630
|
+
* 'payment-form.png',
|
|
1631
|
+
* {
|
|
1632
|
+
* testType: 'accessibility'
|
|
1633
|
+
* }
|
|
1634
|
+
* );
|
|
1635
|
+
*
|
|
1636
|
+
* console.log(result.passes); // true/false
|
|
1637
|
+
* console.log(result.violations.zeroTolerance); // Critical violations
|
|
1638
|
+
* ```
|
|
1639
|
+
*/
|
|
823
1640
|
export class AccessibilityValidator {
|
|
1641
|
+
/**
|
|
1642
|
+
* Create a new Accessibility Validator instance.
|
|
1643
|
+
*
|
|
1644
|
+
* @param options - Validator options (minContrast, standards, etc.)
|
|
1645
|
+
*/
|
|
824
1646
|
constructor(options?: AccessibilityValidatorOptions);
|
|
1647
|
+
|
|
1648
|
+
/**
|
|
1649
|
+
* Validate accessibility (static method).
|
|
1650
|
+
*
|
|
1651
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1652
|
+
* @param options - Validation options
|
|
1653
|
+
* @returns Promise resolving to AccessibilityResult
|
|
1654
|
+
*/
|
|
825
1655
|
static validate(
|
|
826
1656
|
screenshotPath: string | string[],
|
|
827
1657
|
options?: AccessibilityOptions
|
|
828
1658
|
): Promise<AccessibilityResult>;
|
|
1659
|
+
|
|
1660
|
+
/**
|
|
1661
|
+
* Validate accessibility of screenshot.
|
|
1662
|
+
*
|
|
1663
|
+
* @param screenshotPath - Path to screenshot or array for comparison
|
|
1664
|
+
* @param options - Validation options
|
|
1665
|
+
* @returns Promise resolving to AccessibilityResult
|
|
1666
|
+
*/
|
|
829
1667
|
validateAccessibility(
|
|
830
1668
|
screenshotPath: string | string[],
|
|
831
1669
|
options?: AccessibilityOptions
|
|
832
1670
|
): Promise<AccessibilityResult>;
|
|
1671
|
+
|
|
1672
|
+
/**
|
|
1673
|
+
* Build accessibility validation prompt.
|
|
1674
|
+
*
|
|
1675
|
+
* @param options - Validation options
|
|
1676
|
+
* @returns Validation prompt string
|
|
1677
|
+
*/
|
|
833
1678
|
buildAccessibilityPrompt(options?: AccessibilityOptions): string;
|
|
1679
|
+
|
|
1680
|
+
/**
|
|
1681
|
+
* Detect accessibility violations from validation result.
|
|
1682
|
+
*
|
|
1683
|
+
* @param result - Validation result
|
|
1684
|
+
* @returns Categorized violations (zeroTolerance, critical, warnings)
|
|
1685
|
+
*/
|
|
834
1686
|
detectViolations(result: ValidationResult): {
|
|
835
1687
|
zeroTolerance: string[];
|
|
836
1688
|
critical: string[];
|
|
837
1689
|
warnings: string[];
|
|
838
1690
|
};
|
|
1691
|
+
|
|
1692
|
+
/**
|
|
1693
|
+
* Extract contrast information from validation result.
|
|
1694
|
+
*
|
|
1695
|
+
* @param result - Validation result
|
|
1696
|
+
* @returns Contrast ratios and compliance status
|
|
1697
|
+
*/
|
|
839
1698
|
extractContrastInfo(result: ValidationResult): {
|
|
840
1699
|
ratios: string[];
|
|
841
1700
|
minRatio: number | null;
|