@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
package/index.d.ts
ADDED
|
@@ -0,0 +1,1221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TypeScript definitions for ai-visual-test
|
|
3
|
+
*
|
|
4
|
+
* Provides type safety and IntelliSense support for the package.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
// Utility Types
|
|
8
|
+
/**
|
|
9
|
+
* Make specific properties optional
|
|
10
|
+
* @template T
|
|
11
|
+
* @template K
|
|
12
|
+
*/
|
|
13
|
+
export type PartialBy<T, K extends keyof T> = Omit<T, K> & Partial<Pick<T, K>>;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Make specific properties required
|
|
17
|
+
* @template T
|
|
18
|
+
* @template K
|
|
19
|
+
*/
|
|
20
|
+
export type RequiredBy<T, K extends keyof T> = T & Required<Pick<T, K>>;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract return type from a function
|
|
24
|
+
* @template T
|
|
25
|
+
*/
|
|
26
|
+
export type ReturnType<T extends (...args: any[]) => any> = T extends (...args: any[]) => infer R ? R : never;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Extract parameter types from a function
|
|
30
|
+
* @template T
|
|
31
|
+
*/
|
|
32
|
+
export type Parameters<T extends (...args: any[]) => any> = T extends (...args: infer P) => any ? P : never;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Deep partial - makes all nested properties optional
|
|
36
|
+
* @template T
|
|
37
|
+
*/
|
|
38
|
+
export type DeepPartial<T> = T extends object ? { [P in keyof T]?: DeepPartial<T[P]> } : T;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Deep required - makes all nested properties required
|
|
42
|
+
* @template T
|
|
43
|
+
*/
|
|
44
|
+
export type DeepRequired<T> = T extends object ? { [P in keyof T]-?: DeepRequired<T[P]> } : T;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Non-nullable - removes null and undefined from type
|
|
48
|
+
* @template T
|
|
49
|
+
*/
|
|
50
|
+
export type NonNullable<T> = T extends null | undefined ? never : T;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Function type for validation functions
|
|
54
|
+
* @template T
|
|
55
|
+
*/
|
|
56
|
+
export type ValidationFunction<T = ValidationResult> = (
|
|
57
|
+
imagePath: string,
|
|
58
|
+
prompt: string,
|
|
59
|
+
context?: ValidationContext
|
|
60
|
+
) => Promise<T>;
|
|
61
|
+
|
|
62
|
+
// Error Types
|
|
63
|
+
export class AIBrowserTestError extends Error {
|
|
64
|
+
code: string;
|
|
65
|
+
details: Record<string, unknown>;
|
|
66
|
+
constructor(message: string, code: string, details?: Record<string, unknown>);
|
|
67
|
+
toJSON(): {
|
|
68
|
+
name: string;
|
|
69
|
+
code: string;
|
|
70
|
+
message: string;
|
|
71
|
+
details: Record<string, unknown>;
|
|
72
|
+
stack?: string;
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export class ValidationError extends AIBrowserTestError {
|
|
77
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export class CacheError extends AIBrowserTestError {
|
|
81
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export class ConfigError extends AIBrowserTestError {
|
|
85
|
+
constructor(message: string, details?: Record<string, unknown>);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export class ProviderError extends AIBrowserTestError {
|
|
89
|
+
provider: string;
|
|
90
|
+
constructor(message: string, provider: string, details?: Record<string, unknown>);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class TimeoutError extends AIBrowserTestError {
|
|
94
|
+
timeout: number;
|
|
95
|
+
constructor(message: string, timeout: number, details?: Record<string, unknown>);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export class FileError extends AIBrowserTestError {
|
|
99
|
+
filePath: string;
|
|
100
|
+
constructor(message: string, filePath: string, details?: Record<string, unknown>);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export class StateMismatchError extends ValidationError {
|
|
104
|
+
discrepancies: string[];
|
|
105
|
+
extracted: unknown;
|
|
106
|
+
expected: unknown;
|
|
107
|
+
constructor(discrepancies: string[], extracted: unknown, expected: unknown, message?: string);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function isAIBrowserTestError(error: unknown): error is AIBrowserTestError;
|
|
111
|
+
export function isErrorType<T extends AIBrowserTestError>(error: unknown, errorClass: new (...args: any[]) => T): error is T;
|
|
112
|
+
|
|
113
|
+
// Rubrics
|
|
114
|
+
export interface Rubric {
|
|
115
|
+
score: {
|
|
116
|
+
description: string;
|
|
117
|
+
criteria: Record<string, string>;
|
|
118
|
+
};
|
|
119
|
+
dimensions?: Record<string, {
|
|
120
|
+
description: string;
|
|
121
|
+
criteria: string[];
|
|
122
|
+
}>;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export const DEFAULT_RUBRIC: Rubric;
|
|
126
|
+
export function buildRubricPrompt(rubric?: Rubric | null, includeDimensions?: boolean): string;
|
|
127
|
+
export function getRubricForTestType(testType: string): Rubric;
|
|
128
|
+
|
|
129
|
+
// Bias Detection
|
|
130
|
+
export interface BiasDetectionResult {
|
|
131
|
+
hasBias: boolean;
|
|
132
|
+
biases: Array<{
|
|
133
|
+
type: string;
|
|
134
|
+
detected: boolean;
|
|
135
|
+
score: number;
|
|
136
|
+
evidence: Record<string, unknown>;
|
|
137
|
+
}>;
|
|
138
|
+
severity: 'none' | 'low' | 'medium' | 'high';
|
|
139
|
+
recommendations: string[];
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
export function detectBias(judgment: string | object, options?: {
|
|
143
|
+
checkVerbosity?: boolean;
|
|
144
|
+
checkLength?: boolean;
|
|
145
|
+
checkFormatting?: boolean;
|
|
146
|
+
checkPosition?: boolean;
|
|
147
|
+
checkAuthority?: boolean;
|
|
148
|
+
}): BiasDetectionResult;
|
|
149
|
+
|
|
150
|
+
export interface PositionBiasResult {
|
|
151
|
+
detected: boolean;
|
|
152
|
+
firstBias?: boolean;
|
|
153
|
+
lastBias?: boolean;
|
|
154
|
+
reason?: string;
|
|
155
|
+
evidence?: {
|
|
156
|
+
firstScore: number;
|
|
157
|
+
lastScore: number;
|
|
158
|
+
avgMiddle: number;
|
|
159
|
+
allScores: number[];
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export function detectPositionBias(judgments: Array<{ score: number | null }>): PositionBiasResult;
|
|
164
|
+
|
|
165
|
+
// Ensemble Judging
|
|
166
|
+
export interface EnsembleJudgeOptions {
|
|
167
|
+
judges?: Array<VLLMJudge>;
|
|
168
|
+
votingMethod?: 'weighted_average' | 'majority' | 'consensus';
|
|
169
|
+
weights?: number[];
|
|
170
|
+
minAgreement?: number;
|
|
171
|
+
enableBiasDetection?: boolean;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
export interface EnsembleResult {
|
|
175
|
+
score: number | null;
|
|
176
|
+
assessment: string;
|
|
177
|
+
issues: string[];
|
|
178
|
+
reasoning: string;
|
|
179
|
+
confidence: number;
|
|
180
|
+
agreement: {
|
|
181
|
+
score: number;
|
|
182
|
+
scoreAgreement: number;
|
|
183
|
+
assessmentAgreement: number;
|
|
184
|
+
mean: number;
|
|
185
|
+
stdDev: number;
|
|
186
|
+
scores: number[];
|
|
187
|
+
};
|
|
188
|
+
disagreement: {
|
|
189
|
+
hasDisagreement: boolean;
|
|
190
|
+
scoreRange: number;
|
|
191
|
+
assessmentDisagreement: boolean;
|
|
192
|
+
uniqueAssessments: string[];
|
|
193
|
+
maxScore: number;
|
|
194
|
+
minScore: number;
|
|
195
|
+
};
|
|
196
|
+
biasDetection?: {
|
|
197
|
+
individual: BiasDetectionResult[];
|
|
198
|
+
position: PositionBiasResult;
|
|
199
|
+
};
|
|
200
|
+
individualJudgments: Array<{
|
|
201
|
+
judgeIndex: number;
|
|
202
|
+
score: number | null;
|
|
203
|
+
assessment: string | null;
|
|
204
|
+
issues: string[];
|
|
205
|
+
reasoning: string | null;
|
|
206
|
+
provider: string;
|
|
207
|
+
error?: string;
|
|
208
|
+
}>;
|
|
209
|
+
judgeCount: number;
|
|
210
|
+
votingMethod: string;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export class EnsembleJudge {
|
|
214
|
+
constructor(options?: EnsembleJudgeOptions);
|
|
215
|
+
evaluate(imagePath: string, prompt: string, context?: Record<string, unknown>): Promise<EnsembleResult>;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export function createEnsembleJudge(providers?: string[], options?: EnsembleJudgeOptions): EnsembleJudge;
|
|
219
|
+
|
|
220
|
+
// Core Types
|
|
221
|
+
export interface ValidationContext {
|
|
222
|
+
testType?: string;
|
|
223
|
+
viewport?: { width: number; height: number };
|
|
224
|
+
gameState?: Record<string, unknown>;
|
|
225
|
+
useCache?: boolean;
|
|
226
|
+
timeout?: number;
|
|
227
|
+
useRubric?: boolean;
|
|
228
|
+
includeDimensions?: boolean;
|
|
229
|
+
url?: string;
|
|
230
|
+
description?: string;
|
|
231
|
+
step?: string;
|
|
232
|
+
promptBuilder?: (prompt: string, context: ValidationContext) => string;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export interface EstimatedCost {
|
|
236
|
+
inputTokens: number;
|
|
237
|
+
outputTokens: number;
|
|
238
|
+
inputCost: string;
|
|
239
|
+
outputCost: string;
|
|
240
|
+
totalCost: string;
|
|
241
|
+
currency: string;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export interface SemanticInfo {
|
|
245
|
+
score: number | null;
|
|
246
|
+
issues: string[];
|
|
247
|
+
assessment: string | null;
|
|
248
|
+
reasoning: string;
|
|
249
|
+
brutalistViolations?: string[];
|
|
250
|
+
zeroToleranceViolations?: string[];
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export interface ValidationResult {
|
|
254
|
+
enabled: boolean;
|
|
255
|
+
provider: string;
|
|
256
|
+
score: number | null;
|
|
257
|
+
issues: string[];
|
|
258
|
+
assessment: string | null;
|
|
259
|
+
reasoning: string;
|
|
260
|
+
estimatedCost?: EstimatedCost | null;
|
|
261
|
+
responseTime: number;
|
|
262
|
+
cached?: boolean;
|
|
263
|
+
judgment?: string;
|
|
264
|
+
raw?: unknown;
|
|
265
|
+
semantic?: SemanticInfo;
|
|
266
|
+
error?: string;
|
|
267
|
+
message?: string;
|
|
268
|
+
pricing?: { input: number; output: number };
|
|
269
|
+
timestamp?: string;
|
|
270
|
+
testName?: string;
|
|
271
|
+
viewport?: { width: number; height: number } | null;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export interface ConfigOptions {
|
|
275
|
+
provider?: 'gemini' | 'openai' | 'claude' | null;
|
|
276
|
+
apiKey?: string | null;
|
|
277
|
+
env?: NodeJS.ProcessEnv;
|
|
278
|
+
cacheDir?: string | null;
|
|
279
|
+
cacheEnabled?: boolean;
|
|
280
|
+
maxConcurrency?: number;
|
|
281
|
+
timeout?: number;
|
|
282
|
+
verbose?: boolean;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export interface Config {
|
|
286
|
+
provider: string;
|
|
287
|
+
apiKey: string | null;
|
|
288
|
+
providerConfig: {
|
|
289
|
+
name: string;
|
|
290
|
+
apiUrl: string;
|
|
291
|
+
model: string;
|
|
292
|
+
freeTier: boolean;
|
|
293
|
+
pricing: { input: number; output: number };
|
|
294
|
+
priority: number;
|
|
295
|
+
};
|
|
296
|
+
enabled: boolean;
|
|
297
|
+
cache: {
|
|
298
|
+
enabled: boolean;
|
|
299
|
+
dir: string | null;
|
|
300
|
+
};
|
|
301
|
+
performance: {
|
|
302
|
+
maxConcurrency: number;
|
|
303
|
+
timeout: number;
|
|
304
|
+
};
|
|
305
|
+
debug: {
|
|
306
|
+
verbose: boolean;
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// VLLMJudge Class
|
|
311
|
+
export class VLLMJudge {
|
|
312
|
+
constructor(options?: ConfigOptions);
|
|
313
|
+
provider: string;
|
|
314
|
+
apiKey: string | null;
|
|
315
|
+
providerConfig: Config['providerConfig'];
|
|
316
|
+
enabled: boolean;
|
|
317
|
+
|
|
318
|
+
imageToBase64(imagePath: string): string;
|
|
319
|
+
buildPrompt(prompt: string, context: ValidationContext): string;
|
|
320
|
+
extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
321
|
+
estimateCost(data: unknown, provider: string): EstimatedCost | null;
|
|
322
|
+
judgeScreenshot(imagePath: string, prompt: string, context?: ValidationContext): Promise<ValidationResult>;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Core Functions
|
|
326
|
+
export function validateScreenshot(
|
|
327
|
+
imagePath: string,
|
|
328
|
+
prompt: string,
|
|
329
|
+
context?: ValidationContext
|
|
330
|
+
): Promise<ValidationResult>;
|
|
331
|
+
|
|
332
|
+
export function extractSemanticInfo(judgment: string | object): SemanticInfo;
|
|
333
|
+
|
|
334
|
+
// Multi-Modal Types
|
|
335
|
+
export interface RenderedCode {
|
|
336
|
+
html: string;
|
|
337
|
+
criticalCSS: Record<string, Record<string, string>>;
|
|
338
|
+
domStructure: {
|
|
339
|
+
prideParade?: {
|
|
340
|
+
computedTop: string;
|
|
341
|
+
flagRowCount: number;
|
|
342
|
+
};
|
|
343
|
+
footer?: {
|
|
344
|
+
computedBottom: string;
|
|
345
|
+
hasStripe: boolean;
|
|
346
|
+
};
|
|
347
|
+
paymentCode?: {
|
|
348
|
+
visible: boolean;
|
|
349
|
+
};
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
export interface TemporalScreenshot {
|
|
354
|
+
path: string;
|
|
355
|
+
timestamp: number;
|
|
356
|
+
elapsed: number;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
export interface Persona {
|
|
360
|
+
name: string;
|
|
361
|
+
perspective: string;
|
|
362
|
+
focus: string[];
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
export interface PerspectiveEvaluation {
|
|
366
|
+
persona: Persona;
|
|
367
|
+
evaluation: ValidationResult;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Multi-Modal Functions
|
|
371
|
+
export function extractRenderedCode(page: any): Promise<RenderedCode>;
|
|
372
|
+
export function captureTemporalScreenshots(
|
|
373
|
+
page: any,
|
|
374
|
+
fps?: number,
|
|
375
|
+
duration?: number
|
|
376
|
+
): Promise<TemporalScreenshot[]>;
|
|
377
|
+
export function multiPerspectiveEvaluation(
|
|
378
|
+
validateFn: ValidationFunction,
|
|
379
|
+
screenshotPath: string,
|
|
380
|
+
renderedCode: RenderedCode,
|
|
381
|
+
gameState?: Record<string, unknown>,
|
|
382
|
+
personas?: Persona[] | null
|
|
383
|
+
): Promise<PerspectiveEvaluation[]>;
|
|
384
|
+
export function multiModalValidation(
|
|
385
|
+
validateFn: ValidationFunction,
|
|
386
|
+
page: any,
|
|
387
|
+
testName: string,
|
|
388
|
+
options?: {
|
|
389
|
+
fps?: number;
|
|
390
|
+
duration?: number;
|
|
391
|
+
captureCode?: boolean;
|
|
392
|
+
captureState?: boolean;
|
|
393
|
+
multiPerspective?: boolean;
|
|
394
|
+
}
|
|
395
|
+
): Promise<{
|
|
396
|
+
screenshotPath: string;
|
|
397
|
+
renderedCode: RenderedCode | null;
|
|
398
|
+
gameState: Record<string, unknown>;
|
|
399
|
+
temporalScreenshots: TemporalScreenshot[];
|
|
400
|
+
perspectives: PerspectiveEvaluation[];
|
|
401
|
+
codeValidation: Record<string, boolean>;
|
|
402
|
+
aggregatedScore: number | null;
|
|
403
|
+
aggregatedIssues: string[];
|
|
404
|
+
timestamp: number;
|
|
405
|
+
}>;
|
|
406
|
+
|
|
407
|
+
// Temporal Types
|
|
408
|
+
export interface TemporalNote {
|
|
409
|
+
timestamp?: number;
|
|
410
|
+
elapsed?: number;
|
|
411
|
+
score?: number;
|
|
412
|
+
observation?: string;
|
|
413
|
+
step?: string;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export interface TemporalWindow {
|
|
417
|
+
index: number;
|
|
418
|
+
startTime: number;
|
|
419
|
+
endTime: number;
|
|
420
|
+
notes: TemporalNote[];
|
|
421
|
+
weightedScore: number;
|
|
422
|
+
totalWeight: number;
|
|
423
|
+
avgScore: number;
|
|
424
|
+
observations: Set<string>;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
export interface AggregatedTemporalNotes {
|
|
428
|
+
windows: TemporalWindow[];
|
|
429
|
+
summary: string;
|
|
430
|
+
coherence: number;
|
|
431
|
+
conflicts: Array<{
|
|
432
|
+
window1: number;
|
|
433
|
+
window2: number;
|
|
434
|
+
type: string;
|
|
435
|
+
description: string;
|
|
436
|
+
}>;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Temporal Functions
|
|
440
|
+
export function aggregateTemporalNotes(
|
|
441
|
+
notes: TemporalNote[],
|
|
442
|
+
options?: {
|
|
443
|
+
windowSize?: number;
|
|
444
|
+
decayFactor?: number;
|
|
445
|
+
coherenceThreshold?: number;
|
|
446
|
+
}
|
|
447
|
+
): AggregatedTemporalNotes;
|
|
448
|
+
|
|
449
|
+
export function formatNotesForPrompt(aggregated: AggregatedTemporalNotes): string;
|
|
450
|
+
|
|
451
|
+
export function calculateCoherence(windows: TemporalWindow[]): number;
|
|
452
|
+
|
|
453
|
+
// Cache Types
|
|
454
|
+
export interface CacheStats {
|
|
455
|
+
hits: number;
|
|
456
|
+
misses: number;
|
|
457
|
+
size: number;
|
|
458
|
+
hitRate: number;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Cache Functions
|
|
462
|
+
export function initCache(cacheDir?: string): void;
|
|
463
|
+
export function generateCacheKey(imagePath: string, prompt: string, context?: ValidationContext): string;
|
|
464
|
+
export function getCached(imagePath: string, prompt: string, context?: ValidationContext): ValidationResult | null;
|
|
465
|
+
export function setCached(
|
|
466
|
+
imagePath: string,
|
|
467
|
+
prompt: string,
|
|
468
|
+
context: ValidationContext,
|
|
469
|
+
result: ValidationResult
|
|
470
|
+
): void;
|
|
471
|
+
export function clearCache(): void;
|
|
472
|
+
export function getCacheStats(): CacheStats;
|
|
473
|
+
|
|
474
|
+
// Config Functions
|
|
475
|
+
export function createConfig(options?: ConfigOptions): Config;
|
|
476
|
+
export function getConfig(): Config;
|
|
477
|
+
export function setConfig(config: Config): void;
|
|
478
|
+
export function getProvider(providerName?: string | null): Config['providerConfig'];
|
|
479
|
+
|
|
480
|
+
// Utility Functions
|
|
481
|
+
export function loadEnv(basePath?: string | null): void;
|
|
482
|
+
export function initErrorHandlers(): void;
|
|
483
|
+
|
|
484
|
+
// ScoreTracker Class
|
|
485
|
+
export class ScoreTracker {
|
|
486
|
+
constructor(options?: { baselineDir?: string; autoSave?: boolean });
|
|
487
|
+
record(testName: string, score: number, metadata?: Record<string, unknown>): { score: number; timestamp: string; metadata: Record<string, unknown> };
|
|
488
|
+
getBaseline(testName: string): number | null;
|
|
489
|
+
getCurrent(testName: string): number | null;
|
|
490
|
+
compare(testName: string, currentScore: number): { hasBaseline: boolean; baseline: number | null; current: number; improved: boolean; delta: number; percentage: number; regression?: boolean; trend?: string; history?: Array<{ score: number; timestamp: string; metadata?: Record<string, unknown> }> } | null;
|
|
491
|
+
updateBaseline(testName: string, newBaseline?: number | null): boolean;
|
|
492
|
+
getAll(): Record<string, { history: Array<{ score: number; timestamp: string; metadata?: Record<string, unknown> }>; current: number | null; baseline: number | null; firstRecorded: string; lastUpdated: string; baselineSetAt?: string }>;
|
|
493
|
+
getStats(): {
|
|
494
|
+
current: number | null;
|
|
495
|
+
baseline: number | null;
|
|
496
|
+
history: Array<{ score: number; timestamp: number; metadata?: Record<string, unknown> }>;
|
|
497
|
+
average: number | null;
|
|
498
|
+
min: number | null;
|
|
499
|
+
max: number | null;
|
|
500
|
+
totalTests?: number;
|
|
501
|
+
testsWithBaselines?: number;
|
|
502
|
+
testsWithRegressions?: number;
|
|
503
|
+
testsWithImprovements?: number;
|
|
504
|
+
averageScore?: number;
|
|
505
|
+
averageBaseline?: number;
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// BatchOptimizer Class
|
|
510
|
+
export class BatchOptimizer {
|
|
511
|
+
constructor(options?: { maxConcurrency?: number; batchSize?: number; cacheEnabled?: boolean });
|
|
512
|
+
batchValidate(imagePaths: string | string[], prompt: string, context?: ValidationContext): Promise<ValidationResult[]>;
|
|
513
|
+
clearCache(): void;
|
|
514
|
+
getCacheStats(): { cacheSize: number; queueLength: number; activeRequests: number };
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Data Extractor
|
|
518
|
+
export function extractStructuredData(
|
|
519
|
+
text: string,
|
|
520
|
+
schema: object,
|
|
521
|
+
options?: {
|
|
522
|
+
method?: 'json' | 'llm' | 'regex';
|
|
523
|
+
provider?: string;
|
|
524
|
+
apiKey?: string;
|
|
525
|
+
}
|
|
526
|
+
): Promise<unknown>;
|
|
527
|
+
|
|
528
|
+
// Feedback Aggregator
|
|
529
|
+
export interface AggregatedFeedback {
|
|
530
|
+
averageScore: number;
|
|
531
|
+
totalIssues: number;
|
|
532
|
+
commonIssues: Array<{ issue: string; count: number }>;
|
|
533
|
+
scoreDistribution: Record<string, number>;
|
|
534
|
+
recommendations: string[];
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
export function aggregateFeedback(judgeResults: ValidationResult[]): AggregatedFeedback;
|
|
538
|
+
export function generateRecommendations(aggregated: AggregatedFeedback): string[];
|
|
539
|
+
|
|
540
|
+
// Context Compressor
|
|
541
|
+
export function compressContext(
|
|
542
|
+
notes: TemporalNote[],
|
|
543
|
+
options?: {
|
|
544
|
+
maxLength?: number;
|
|
545
|
+
preserveImportant?: boolean;
|
|
546
|
+
}
|
|
547
|
+
): TemporalNote[];
|
|
548
|
+
|
|
549
|
+
export function compressStateHistory(
|
|
550
|
+
stateHistory: Array<Record<string, unknown>>,
|
|
551
|
+
options?: {
|
|
552
|
+
maxLength?: number;
|
|
553
|
+
preserveImportant?: boolean;
|
|
554
|
+
}
|
|
555
|
+
): Array<Record<string, unknown>>;
|
|
556
|
+
|
|
557
|
+
// Persona Experience
|
|
558
|
+
export interface PersonaExperienceOptions {
|
|
559
|
+
viewport?: { width: number; height: number };
|
|
560
|
+
device?: string;
|
|
561
|
+
darkMode?: boolean;
|
|
562
|
+
timeScale?: 'human' | 'mechanical';
|
|
563
|
+
captureScreenshots?: boolean;
|
|
564
|
+
captureState?: boolean;
|
|
565
|
+
captureCode?: boolean;
|
|
566
|
+
notes?: TemporalNote[];
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
export interface PersonaExperienceResult {
|
|
570
|
+
persona: Persona;
|
|
571
|
+
notes: TemporalNote[];
|
|
572
|
+
screenshots: TemporalScreenshot[];
|
|
573
|
+
renderedCode?: RenderedCode;
|
|
574
|
+
gameState?: Record<string, unknown>;
|
|
575
|
+
evaluation?: ValidationResult;
|
|
576
|
+
timestamp: number;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
export function experiencePageAsPersona(
|
|
580
|
+
page: any,
|
|
581
|
+
persona: Persona,
|
|
582
|
+
options?: PersonaExperienceOptions
|
|
583
|
+
): Promise<PersonaExperienceResult>;
|
|
584
|
+
|
|
585
|
+
export function experiencePageWithPersonas(
|
|
586
|
+
page: any,
|
|
587
|
+
personas: Persona[],
|
|
588
|
+
options?: PersonaExperienceOptions
|
|
589
|
+
): Promise<PersonaExperienceResult[]>;
|
|
590
|
+
|
|
591
|
+
// Type Guards
|
|
592
|
+
export function isObject<T>(value: unknown): value is Record<string, T>;
|
|
593
|
+
export function isString(value: unknown): value is string;
|
|
594
|
+
export function isNumber(value: unknown): value is number;
|
|
595
|
+
export function isPositiveInteger(value: unknown): value is number;
|
|
596
|
+
export function isNonEmptyString(value: unknown): value is string;
|
|
597
|
+
export function isArray<T>(value: unknown): value is T[];
|
|
598
|
+
export function isFunction(value: unknown): value is Function;
|
|
599
|
+
export function isPromise<T>(value: unknown): value is Promise<T>;
|
|
600
|
+
export function isValidationResult(value: unknown): value is ValidationResult;
|
|
601
|
+
export function isValidationContext(value: unknown): value is ValidationContext;
|
|
602
|
+
export function isPersona(value: unknown): value is Persona;
|
|
603
|
+
export function isTemporalNote(value: unknown): value is TemporalNote;
|
|
604
|
+
|
|
605
|
+
// Type Assertions
|
|
606
|
+
export function assertObject<T>(value: unknown, name?: string): asserts value is Record<string, T>;
|
|
607
|
+
export function assertString(value: unknown, name?: string): asserts value is string;
|
|
608
|
+
export function assertNonEmptyString(value: unknown, name?: string): asserts value is string;
|
|
609
|
+
export function assertNumber(value: unknown, name?: string): asserts value is number;
|
|
610
|
+
export function assertArray<T>(value: unknown, name?: string): asserts value is T[];
|
|
611
|
+
export function assertFunction(value: unknown, name?: string): asserts value is Function;
|
|
612
|
+
|
|
613
|
+
// Utility Functions
|
|
614
|
+
export function pick<T, K extends keyof T>(obj: T, keys: K[]): Pick<T, K>;
|
|
615
|
+
export function getProperty<T, D>(obj: T, key: string, defaultValue: D): T[keyof T] | D;
|
|
616
|
+
|
|
617
|
+
// Experience Tracer
|
|
618
|
+
export class ExperienceTrace {
|
|
619
|
+
constructor(sessionId: string, persona?: Persona | null);
|
|
620
|
+
sessionId: string;
|
|
621
|
+
persona: Persona | null;
|
|
622
|
+
startTime: number;
|
|
623
|
+
events: Array<Record<string, unknown>>;
|
|
624
|
+
validations: Array<Record<string, unknown>>;
|
|
625
|
+
screenshots: Array<Record<string, unknown>>;
|
|
626
|
+
stateHistory: Array<Record<string, unknown>>;
|
|
627
|
+
aggregatedNotes: AggregatedTemporalNotes | null;
|
|
628
|
+
metaEvaluation: Record<string, unknown> | null;
|
|
629
|
+
|
|
630
|
+
addEvent(type: string, data: Record<string, unknown>, timestamp?: number | null): Record<string, unknown>;
|
|
631
|
+
addValidation(validation: ValidationResult, context?: Record<string, unknown>): Record<string, unknown>;
|
|
632
|
+
addScreenshot(path: string, step: string, metadata?: Record<string, unknown>): Record<string, unknown>;
|
|
633
|
+
addStateSnapshot(state: Record<string, unknown>, label?: string): Record<string, unknown>;
|
|
634
|
+
aggregateNotes(
|
|
635
|
+
aggregateTemporalNotes: (notes: TemporalNote[], options?: Record<string, unknown>) => AggregatedTemporalNotes,
|
|
636
|
+
options?: Record<string, unknown>
|
|
637
|
+
): AggregatedTemporalNotes;
|
|
638
|
+
getSummary(): Record<string, unknown>;
|
|
639
|
+
getFullTrace(): Record<string, unknown>;
|
|
640
|
+
exportToJSON(filePath: string): Promise<void>;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
export class ExperienceTracerManager {
|
|
644
|
+
constructor();
|
|
645
|
+
createTrace(sessionId: string, persona?: Persona | null): ExperienceTrace;
|
|
646
|
+
getTrace(sessionId: string): ExperienceTrace | null;
|
|
647
|
+
getAllTraces(): ExperienceTrace[];
|
|
648
|
+
metaEvaluateTrace(
|
|
649
|
+
sessionId: string,
|
|
650
|
+
validateScreenshot: ValidationFunction
|
|
651
|
+
): Promise<Record<string, unknown>>;
|
|
652
|
+
getMetaEvaluationSummary(): {
|
|
653
|
+
totalEvaluations: number;
|
|
654
|
+
averageQuality: number | null;
|
|
655
|
+
evaluations?: Array<Record<string, unknown>>;
|
|
656
|
+
};
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
export function getTracerManager(): ExperienceTracerManager;
|
|
660
|
+
|
|
661
|
+
// Position Counter-Balance
|
|
662
|
+
export interface CounterBalanceOptions {
|
|
663
|
+
enabled?: boolean;
|
|
664
|
+
baselinePath?: string | null;
|
|
665
|
+
contextOrder?: 'original' | 'reversed';
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
export interface CounterBalancedResult extends ValidationResult {
|
|
669
|
+
counterBalanced: boolean;
|
|
670
|
+
originalScore: number | null;
|
|
671
|
+
reversedScore: number | null;
|
|
672
|
+
scoreDifference: number | null;
|
|
673
|
+
metadata: {
|
|
674
|
+
counterBalancing: {
|
|
675
|
+
enabled: boolean;
|
|
676
|
+
originalResult: ValidationResult;
|
|
677
|
+
reversedResult: ValidationResult;
|
|
678
|
+
positionBiasDetected: boolean;
|
|
679
|
+
};
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
export function evaluateWithCounterBalance(
|
|
684
|
+
evaluateFn: ValidationFunction<ValidationResult>,
|
|
685
|
+
imagePath: string,
|
|
686
|
+
prompt: string,
|
|
687
|
+
context?: ValidationContext,
|
|
688
|
+
options?: CounterBalanceOptions
|
|
689
|
+
): Promise<CounterBalancedResult>;
|
|
690
|
+
|
|
691
|
+
export function shouldUseCounterBalance(context: ValidationContext): boolean;
|
|
692
|
+
|
|
693
|
+
// Dynamic Few-Shot Examples
|
|
694
|
+
export interface FewShotExample {
|
|
695
|
+
description?: string;
|
|
696
|
+
evaluation?: string;
|
|
697
|
+
score?: number | null;
|
|
698
|
+
screenshot?: string;
|
|
699
|
+
quality?: string;
|
|
700
|
+
result?: {
|
|
701
|
+
score?: number | null;
|
|
702
|
+
reasoning?: string;
|
|
703
|
+
};
|
|
704
|
+
json?: unknown;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
export interface FewShotOptions {
|
|
708
|
+
maxExamples?: number;
|
|
709
|
+
similarityThreshold?: number;
|
|
710
|
+
useSemanticMatching?: boolean;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
export function selectFewShotExamples(
|
|
714
|
+
prompt: string,
|
|
715
|
+
examples?: FewShotExample[],
|
|
716
|
+
options?: FewShotOptions
|
|
717
|
+
): FewShotExample[];
|
|
718
|
+
|
|
719
|
+
export function formatFewShotExamples(
|
|
720
|
+
examples: FewShotExample[],
|
|
721
|
+
format?: 'default' | 'json'
|
|
722
|
+
): string;
|
|
723
|
+
|
|
724
|
+
// Metrics
|
|
725
|
+
export function spearmanCorrelation(
|
|
726
|
+
x: Array<number | null>,
|
|
727
|
+
y: Array<number | null>
|
|
728
|
+
): number | null;
|
|
729
|
+
|
|
730
|
+
export function pearsonCorrelation(
|
|
731
|
+
x: Array<number | null>,
|
|
732
|
+
y: Array<number | null>
|
|
733
|
+
): number | null;
|
|
734
|
+
|
|
735
|
+
export interface RankAgreementResult {
|
|
736
|
+
spearman: number | null;
|
|
737
|
+
pearson: number | null;
|
|
738
|
+
kendall: number | null;
|
|
739
|
+
exactMatches: number;
|
|
740
|
+
totalItems: number;
|
|
741
|
+
agreementRate: number;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
export function calculateRankAgreement(
|
|
745
|
+
ranking1: Array<number | null>,
|
|
746
|
+
ranking2: Array<number | null>
|
|
747
|
+
): RankAgreementResult;
|
|
748
|
+
|
|
749
|
+
// Validators
|
|
750
|
+
export interface StateValidatorOptions<T = unknown> {
|
|
751
|
+
tolerance?: number;
|
|
752
|
+
validateScreenshot?: ValidationFunction;
|
|
753
|
+
stateExtractor?: (result: ValidationResult, expected: T) => Partial<T>;
|
|
754
|
+
stateComparator?: (extracted: Partial<T>, expected: T, options: { tolerance: number }) => {
|
|
755
|
+
matches: boolean;
|
|
756
|
+
discrepancies: string[];
|
|
757
|
+
};
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
export interface StateValidationOptions<T = unknown> {
|
|
761
|
+
promptBuilder?: (expected: T, options: Record<string, unknown>) => string;
|
|
762
|
+
testType?: string;
|
|
763
|
+
context?: Record<string, unknown>;
|
|
764
|
+
stateDescription?: string;
|
|
765
|
+
extractionTasks?: string[];
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
export interface StateValidationResult<T = unknown> extends ValidationResult {
|
|
769
|
+
extractedState: Partial<T>;
|
|
770
|
+
expectedState: T;
|
|
771
|
+
validation: {
|
|
772
|
+
matches: boolean;
|
|
773
|
+
discrepancies: string[];
|
|
774
|
+
};
|
|
775
|
+
matches: boolean;
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
export class StateValidator<T = unknown> {
|
|
779
|
+
constructor(options?: StateValidatorOptions<T>);
|
|
780
|
+
static validate<T = unknown>(
|
|
781
|
+
screenshotPath: string | string[],
|
|
782
|
+
expectedState: T,
|
|
783
|
+
options?: StateValidationOptions<T>
|
|
784
|
+
): Promise<StateValidationResult<T>>;
|
|
785
|
+
validateState(
|
|
786
|
+
screenshotPath: string | string[],
|
|
787
|
+
expectedState: T,
|
|
788
|
+
options?: StateValidationOptions<T>
|
|
789
|
+
): Promise<StateValidationResult<T>>;
|
|
790
|
+
buildStatePrompt(expectedState: T, options?: StateValidationOptions<T>): string;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
export interface AccessibilityValidatorOptions {
|
|
794
|
+
minContrast?: number;
|
|
795
|
+
standards?: string[];
|
|
796
|
+
zeroTolerance?: boolean;
|
|
797
|
+
validateScreenshot?: ValidationFunction;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
export interface AccessibilityOptions {
|
|
801
|
+
customPrompt?: string;
|
|
802
|
+
minContrast?: number;
|
|
803
|
+
standards?: string[];
|
|
804
|
+
testType?: string;
|
|
805
|
+
[key: string]: unknown;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
export interface AccessibilityResult extends ValidationResult {
|
|
809
|
+
violations: {
|
|
810
|
+
zeroTolerance: string[];
|
|
811
|
+
critical: string[];
|
|
812
|
+
warnings: string[];
|
|
813
|
+
};
|
|
814
|
+
passes: boolean;
|
|
815
|
+
contrastCheck: {
|
|
816
|
+
ratios: string[];
|
|
817
|
+
minRatio: number | null;
|
|
818
|
+
meetsRequirement: boolean | null;
|
|
819
|
+
};
|
|
820
|
+
standards: string[];
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
export class AccessibilityValidator {
|
|
824
|
+
constructor(options?: AccessibilityValidatorOptions);
|
|
825
|
+
static validate(
|
|
826
|
+
screenshotPath: string | string[],
|
|
827
|
+
options?: AccessibilityOptions
|
|
828
|
+
): Promise<AccessibilityResult>;
|
|
829
|
+
validateAccessibility(
|
|
830
|
+
screenshotPath: string | string[],
|
|
831
|
+
options?: AccessibilityOptions
|
|
832
|
+
): Promise<AccessibilityResult>;
|
|
833
|
+
buildAccessibilityPrompt(options?: AccessibilityOptions): string;
|
|
834
|
+
detectViolations(result: ValidationResult): {
|
|
835
|
+
zeroTolerance: string[];
|
|
836
|
+
critical: string[];
|
|
837
|
+
warnings: string[];
|
|
838
|
+
};
|
|
839
|
+
extractContrastInfo(result: ValidationResult): {
|
|
840
|
+
ratios: string[];
|
|
841
|
+
minRatio: number | null;
|
|
842
|
+
meetsRequirement: boolean | null;
|
|
843
|
+
};
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
export type PromptTemplate = (variables: Record<string, unknown>, context?: Record<string, unknown>) => string;
|
|
847
|
+
|
|
848
|
+
export interface PromptBuilderOptions {
|
|
849
|
+
templates?: Record<string, PromptTemplate | string>;
|
|
850
|
+
rubric?: Rubric;
|
|
851
|
+
defaultContext?: Record<string, unknown>;
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
export interface PromptOptions {
|
|
855
|
+
variables?: Record<string, unknown>;
|
|
856
|
+
context?: Record<string, unknown>;
|
|
857
|
+
includeRubric?: boolean;
|
|
858
|
+
includeZeroTolerance?: boolean;
|
|
859
|
+
includeScoring?: boolean;
|
|
860
|
+
enforceZeroTolerance?: boolean;
|
|
861
|
+
rubric?: Rubric;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
export class PromptBuilder {
|
|
865
|
+
constructor(options?: PromptBuilderOptions);
|
|
866
|
+
buildPrompt(basePrompt: string, options?: PromptOptions): string;
|
|
867
|
+
buildFromTemplate(templateName: string, variables?: Record<string, unknown>, options?: PromptOptions): string;
|
|
868
|
+
registerTemplate(name: string, template: PromptTemplate | string): void;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
export interface RubricOptions {
|
|
872
|
+
enforceZeroTolerance?: boolean;
|
|
873
|
+
includeZeroTolerance?: boolean;
|
|
874
|
+
includeScoring?: boolean;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
export interface RubricCriterion {
|
|
878
|
+
id: string;
|
|
879
|
+
rule: string;
|
|
880
|
+
weight?: number;
|
|
881
|
+
zeroTolerance?: boolean;
|
|
882
|
+
penalty?: number;
|
|
883
|
+
description?: string;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
export interface ExtendedRubric extends Rubric {
|
|
887
|
+
criteria?: RubricCriterion[];
|
|
888
|
+
name?: string;
|
|
889
|
+
description?: string;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
export function validateWithRubric(
|
|
893
|
+
screenshotPath: string,
|
|
894
|
+
prompt: string,
|
|
895
|
+
rubric: ExtendedRubric,
|
|
896
|
+
context?: ValidationContext,
|
|
897
|
+
options?: RubricOptions
|
|
898
|
+
): Promise<ValidationResult & { zeroToleranceViolation?: boolean }>;
|
|
899
|
+
|
|
900
|
+
export interface BatchValidatorOptions {
|
|
901
|
+
maxConcurrency?: number;
|
|
902
|
+
batchSize?: number;
|
|
903
|
+
cacheEnabled?: boolean;
|
|
904
|
+
trackCosts?: boolean;
|
|
905
|
+
trackStats?: boolean;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
export interface BatchValidationStats {
|
|
909
|
+
total: number;
|
|
910
|
+
passed: number;
|
|
911
|
+
failed: number;
|
|
912
|
+
duration: number;
|
|
913
|
+
costStats: ReturnType<CostTracker['getStats']> | null;
|
|
914
|
+
performance: {
|
|
915
|
+
totalRequests: number;
|
|
916
|
+
avgDuration: number;
|
|
917
|
+
minDuration: number;
|
|
918
|
+
maxDuration: number;
|
|
919
|
+
successRate: number;
|
|
920
|
+
} | null;
|
|
921
|
+
}
|
|
922
|
+
|
|
923
|
+
export interface BatchValidationResult {
|
|
924
|
+
results: ValidationResult[];
|
|
925
|
+
stats: BatchValidationStats | null;
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
export class BatchValidator extends BatchOptimizer {
|
|
929
|
+
constructor(options?: BatchValidatorOptions);
|
|
930
|
+
batchValidate(
|
|
931
|
+
screenshots: string | string[],
|
|
932
|
+
prompt: string,
|
|
933
|
+
context?: ValidationContext
|
|
934
|
+
): Promise<BatchValidationResult>;
|
|
935
|
+
getCostStats(): ReturnType<CostTracker['getStats']>;
|
|
936
|
+
getPerformanceStats(): {
|
|
937
|
+
totalRequests: number;
|
|
938
|
+
avgDuration: number;
|
|
939
|
+
minDuration: number;
|
|
940
|
+
maxDuration: number;
|
|
941
|
+
successRate: number;
|
|
942
|
+
};
|
|
943
|
+
resetStats(): void;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
// Programmatic Validators (fast, deterministic)
|
|
947
|
+
// Use these when you have Playwright page access and need fast feedback (<100ms)
|
|
948
|
+
|
|
949
|
+
/**
|
|
950
|
+
* Calculate contrast ratio between two colors (WCAG algorithm)
|
|
951
|
+
*
|
|
952
|
+
* @param color1 - First color (rgb, rgba, or hex)
|
|
953
|
+
* @param color2 - Second color (rgb, rgba, or hex)
|
|
954
|
+
* @returns Contrast ratio (1.0 to 21.0+)
|
|
955
|
+
*/
|
|
956
|
+
export function getContrastRatio(color1: string, color2: string): number;
|
|
957
|
+
|
|
958
|
+
/**
|
|
959
|
+
* Contrast check result for a single element
|
|
960
|
+
*/
|
|
961
|
+
export interface ElementContrastResult {
|
|
962
|
+
ratio: number;
|
|
963
|
+
passes: boolean;
|
|
964
|
+
foreground: string;
|
|
965
|
+
background: string;
|
|
966
|
+
foregroundRgb?: [number, number, number];
|
|
967
|
+
backgroundRgb?: [number, number, number];
|
|
968
|
+
error?: string;
|
|
969
|
+
selector?: string;
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
/**
|
|
973
|
+
* Check contrast ratio for an element
|
|
974
|
+
*
|
|
975
|
+
* @param page - Playwright page object
|
|
976
|
+
* @param selector - CSS selector for element
|
|
977
|
+
* @param minRatio - Minimum required contrast ratio (default: 4.5 for WCAG-AA)
|
|
978
|
+
* @returns Contrast check result
|
|
979
|
+
*/
|
|
980
|
+
export function checkElementContrast(
|
|
981
|
+
page: any,
|
|
982
|
+
selector: string,
|
|
983
|
+
minRatio?: number
|
|
984
|
+
): Promise<ElementContrastResult>;
|
|
985
|
+
|
|
986
|
+
/**
|
|
987
|
+
* Text contrast check result for all text elements
|
|
988
|
+
*/
|
|
989
|
+
export interface AllTextContrastResult {
|
|
990
|
+
total: number;
|
|
991
|
+
passing: number;
|
|
992
|
+
failing: number;
|
|
993
|
+
violations: Array<{
|
|
994
|
+
element: string;
|
|
995
|
+
ratio: string;
|
|
996
|
+
required: number;
|
|
997
|
+
foreground: string;
|
|
998
|
+
background: string;
|
|
999
|
+
}>;
|
|
1000
|
+
elements?: Array<{
|
|
1001
|
+
tag: string;
|
|
1002
|
+
id: string;
|
|
1003
|
+
className: string;
|
|
1004
|
+
ratio: number;
|
|
1005
|
+
passes: boolean;
|
|
1006
|
+
foreground: string;
|
|
1007
|
+
background: string;
|
|
1008
|
+
}>;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
/**
|
|
1012
|
+
* Check contrast for all text elements on page
|
|
1013
|
+
*
|
|
1014
|
+
* @param page - Playwright page object
|
|
1015
|
+
* @param minRatio - Minimum required contrast ratio (default: 4.5 for WCAG-AA)
|
|
1016
|
+
* @returns Contrast check results for all text elements
|
|
1017
|
+
*/
|
|
1018
|
+
export function checkAllTextContrast(
|
|
1019
|
+
page: any,
|
|
1020
|
+
minRatio?: number
|
|
1021
|
+
): Promise<AllTextContrastResult>;
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Keyboard navigation check result
|
|
1025
|
+
*/
|
|
1026
|
+
export interface KeyboardNavigationResult {
|
|
1027
|
+
keyboardAccessible: boolean;
|
|
1028
|
+
focusableElements: number;
|
|
1029
|
+
violations: Array<{
|
|
1030
|
+
element: string;
|
|
1031
|
+
issue: string;
|
|
1032
|
+
}>;
|
|
1033
|
+
focusableSelectors: string[];
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
/**
|
|
1037
|
+
* Check keyboard navigation accessibility
|
|
1038
|
+
*
|
|
1039
|
+
* @param page - Playwright page object
|
|
1040
|
+
* @returns Keyboard navigation check result
|
|
1041
|
+
*/
|
|
1042
|
+
export function checkKeyboardNavigation(page: any): Promise<KeyboardNavigationResult>;
|
|
1043
|
+
|
|
1044
|
+
/**
|
|
1045
|
+
* Programmatic state validation options
|
|
1046
|
+
*/
|
|
1047
|
+
export interface ProgrammaticStateOptions {
|
|
1048
|
+
selectors?: Record<string, string>;
|
|
1049
|
+
tolerance?: number;
|
|
1050
|
+
stateExtractor?: (page: any) => Promise<unknown>;
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
/**
|
|
1054
|
+
* Programmatic state validation result
|
|
1055
|
+
*/
|
|
1056
|
+
export interface ProgrammaticStateResult {
|
|
1057
|
+
matches: boolean;
|
|
1058
|
+
discrepancies: string[];
|
|
1059
|
+
visualState: Record<string, {
|
|
1060
|
+
x: number;
|
|
1061
|
+
y: number;
|
|
1062
|
+
width: number;
|
|
1063
|
+
height: number;
|
|
1064
|
+
visible: boolean;
|
|
1065
|
+
} | null>;
|
|
1066
|
+
expectedState: Record<string, unknown>;
|
|
1067
|
+
gameState?: unknown;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
/**
|
|
1071
|
+
* Validate state matches visual representation
|
|
1072
|
+
*
|
|
1073
|
+
* @param page - Playwright page object
|
|
1074
|
+
* @param expectedState - Expected state object
|
|
1075
|
+
* @param options - Validation options
|
|
1076
|
+
* @returns State validation result
|
|
1077
|
+
*/
|
|
1078
|
+
export function validateStateProgrammatic(
|
|
1079
|
+
page: any,
|
|
1080
|
+
expectedState: Record<string, unknown>,
|
|
1081
|
+
options?: ProgrammaticStateOptions
|
|
1082
|
+
): Promise<ProgrammaticStateResult>;
|
|
1083
|
+
|
|
1084
|
+
/**
|
|
1085
|
+
* Element position validation result
|
|
1086
|
+
*/
|
|
1087
|
+
export interface ElementPositionResult {
|
|
1088
|
+
matches: boolean;
|
|
1089
|
+
actual: {
|
|
1090
|
+
x: number;
|
|
1091
|
+
y: number;
|
|
1092
|
+
width: number;
|
|
1093
|
+
height: number;
|
|
1094
|
+
};
|
|
1095
|
+
expected: {
|
|
1096
|
+
x?: number;
|
|
1097
|
+
y?: number;
|
|
1098
|
+
width?: number;
|
|
1099
|
+
height?: number;
|
|
1100
|
+
};
|
|
1101
|
+
diff: {
|
|
1102
|
+
x: number;
|
|
1103
|
+
y: number;
|
|
1104
|
+
width?: number;
|
|
1105
|
+
height?: number;
|
|
1106
|
+
};
|
|
1107
|
+
tolerance: number;
|
|
1108
|
+
error?: string;
|
|
1109
|
+
selector?: string;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
/**
|
|
1113
|
+
* Validate element position matches expected position
|
|
1114
|
+
*
|
|
1115
|
+
* @param page - Playwright page object
|
|
1116
|
+
* @param selector - CSS selector for element
|
|
1117
|
+
* @param expectedPosition - Expected position {x, y} or {x, y, width, height}
|
|
1118
|
+
* @param tolerance - Pixel tolerance (default: 5)
|
|
1119
|
+
* @returns Position validation result
|
|
1120
|
+
*/
|
|
1121
|
+
export function validateElementPosition(
|
|
1122
|
+
page: any,
|
|
1123
|
+
selector: string,
|
|
1124
|
+
expectedPosition: {
|
|
1125
|
+
x?: number;
|
|
1126
|
+
y?: number;
|
|
1127
|
+
width?: number;
|
|
1128
|
+
height?: number;
|
|
1129
|
+
},
|
|
1130
|
+
tolerance?: number
|
|
1131
|
+
): Promise<ElementPositionResult>;
|
|
1132
|
+
|
|
1133
|
+
// Hybrid Validators (Programmatic + VLLM)
|
|
1134
|
+
// Combine programmatic data with semantic LLM evaluation
|
|
1135
|
+
|
|
1136
|
+
/**
|
|
1137
|
+
* Hybrid accessibility validation result
|
|
1138
|
+
*/
|
|
1139
|
+
export interface AccessibilityHybridResult extends ValidationResult {
|
|
1140
|
+
programmaticData: {
|
|
1141
|
+
contrast: AllTextContrastResult;
|
|
1142
|
+
keyboard: KeyboardNavigationResult;
|
|
1143
|
+
};
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
/**
|
|
1147
|
+
* Hybrid accessibility validation
|
|
1148
|
+
* Combines programmatic contrast/keyboard checks with VLLM semantic evaluation
|
|
1149
|
+
*
|
|
1150
|
+
* @param page - Playwright page object
|
|
1151
|
+
* @param screenshotPath - Path to screenshot
|
|
1152
|
+
* @param minContrast - Minimum contrast ratio (default: 4.5)
|
|
1153
|
+
* @param options - Validation options
|
|
1154
|
+
* @returns Hybrid validation result with programmatic data
|
|
1155
|
+
*/
|
|
1156
|
+
export function validateAccessibilityHybrid(
|
|
1157
|
+
page: any,
|
|
1158
|
+
screenshotPath: string,
|
|
1159
|
+
minContrast?: number,
|
|
1160
|
+
options?: ValidationContext
|
|
1161
|
+
): Promise<AccessibilityHybridResult>;
|
|
1162
|
+
|
|
1163
|
+
/**
|
|
1164
|
+
* Hybrid state validation result
|
|
1165
|
+
*/
|
|
1166
|
+
export interface StateHybridResult extends ValidationResult {
|
|
1167
|
+
programmaticData: {
|
|
1168
|
+
gameState?: unknown;
|
|
1169
|
+
visualState: Record<string, {
|
|
1170
|
+
x: number;
|
|
1171
|
+
y: number;
|
|
1172
|
+
width: number;
|
|
1173
|
+
height: number;
|
|
1174
|
+
visible: boolean;
|
|
1175
|
+
} | null>;
|
|
1176
|
+
discrepancies: string[];
|
|
1177
|
+
matches: boolean;
|
|
1178
|
+
};
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
/**
|
|
1182
|
+
* Hybrid state validation
|
|
1183
|
+
* Combines programmatic state extraction with VLLM semantic evaluation
|
|
1184
|
+
*
|
|
1185
|
+
* @param page - Playwright page object
|
|
1186
|
+
* @param screenshotPath - Path to screenshot
|
|
1187
|
+
* @param expectedState - Expected state object
|
|
1188
|
+
* @param options - Validation options
|
|
1189
|
+
* @returns Hybrid validation result with programmatic data
|
|
1190
|
+
*/
|
|
1191
|
+
export function validateStateHybrid(
|
|
1192
|
+
page: any,
|
|
1193
|
+
screenshotPath: string,
|
|
1194
|
+
expectedState: Record<string, unknown>,
|
|
1195
|
+
options?: ProgrammaticStateOptions & ValidationContext
|
|
1196
|
+
): Promise<StateHybridResult>;
|
|
1197
|
+
|
|
1198
|
+
/**
|
|
1199
|
+
* Generic hybrid validator result
|
|
1200
|
+
*/
|
|
1201
|
+
export interface HybridValidationResult extends ValidationResult {
|
|
1202
|
+
programmaticData: Record<string, unknown>;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
/**
|
|
1206
|
+
* Generic hybrid validator helper
|
|
1207
|
+
* Combines any programmatic data with VLLM evaluation
|
|
1208
|
+
*
|
|
1209
|
+
* @param screenshotPath - Path to screenshot
|
|
1210
|
+
* @param prompt - Base evaluation prompt
|
|
1211
|
+
* @param programmaticData - Programmatic validation data
|
|
1212
|
+
* @param options - Validation options
|
|
1213
|
+
* @returns Hybrid validation result with programmatic data
|
|
1214
|
+
*/
|
|
1215
|
+
export function validateWithProgrammaticContext(
|
|
1216
|
+
screenshotPath: string,
|
|
1217
|
+
prompt: string,
|
|
1218
|
+
programmaticData: Record<string, unknown>,
|
|
1219
|
+
options?: ValidationContext
|
|
1220
|
+
): Promise<HybridValidationResult>;
|
|
1221
|
+
|