@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Programmatic State Validator
|
|
3
|
+
*
|
|
4
|
+
* Fast, deterministic state validation using direct state access and DOM inspection.
|
|
5
|
+
* Use this when you have Playwright page access and direct state access (e.g., window.gameState).
|
|
6
|
+
*
|
|
7
|
+
* For state extraction from screenshots (when you don't have direct state access), use StateValidator (VLLM-based).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { ValidationError } from '../errors.mjs';
|
|
11
|
+
import { assertString, assertObject, assertNumber } from '../type-guards.mjs';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Validate state matches visual representation
|
|
15
|
+
*
|
|
16
|
+
* @param {any} page - Playwright page object
|
|
17
|
+
* @param {object} expectedState - Expected state object
|
|
18
|
+
* @param {object} options - Validation options
|
|
19
|
+
* @param {object} options.selectors - Map of state keys to CSS selectors (e.g., { ball: '#game-ball', paddle: '#game-paddle' })
|
|
20
|
+
* @param {number} options.tolerance - Pixel tolerance for position comparison (default: 5)
|
|
21
|
+
* @param {function} options.stateExtractor - Optional function to extract state from page (default: uses window.gameState)
|
|
22
|
+
* @returns {Promise<{matches: boolean, discrepancies: string[], visualState: object, expectedState: object}>}
|
|
23
|
+
* @throws {ValidationError} If page is not a valid Playwright Page object or inputs are invalid
|
|
24
|
+
*/
|
|
25
|
+
export async function validateStateProgrammatic(page, expectedState, options = {}) {
|
|
26
|
+
// Validate inputs
|
|
27
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
28
|
+
throw new ValidationError('validateStateProgrammatic requires a Playwright Page object', {
|
|
29
|
+
received: typeof page,
|
|
30
|
+
hasEvaluate: typeof page?.evaluate === 'function'
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
assertObject(expectedState, 'expectedState');
|
|
35
|
+
|
|
36
|
+
const selectors = options.selectors || {};
|
|
37
|
+
const tolerance = options.tolerance || 5;
|
|
38
|
+
const stateExtractor = options.stateExtractor || ((page) => page.evaluate(() => window.gameState || null));
|
|
39
|
+
|
|
40
|
+
if (typeof tolerance !== 'number' || tolerance < 0 || isNaN(tolerance)) {
|
|
41
|
+
throw new ValidationError('tolerance must be a non-negative number', { received: tolerance });
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Extract state from page
|
|
45
|
+
let gameState;
|
|
46
|
+
if (typeof stateExtractor === 'function') {
|
|
47
|
+
gameState = await stateExtractor(page);
|
|
48
|
+
} else {
|
|
49
|
+
gameState = await page.evaluate(() => window.gameState || null);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Extract visual state from DOM
|
|
53
|
+
const visualState = await page.evaluate(({ selectors }) => {
|
|
54
|
+
const state = {};
|
|
55
|
+
|
|
56
|
+
for (const [key, selector] of Object.entries(selectors)) {
|
|
57
|
+
const element = document.querySelector(selector);
|
|
58
|
+
if (element) {
|
|
59
|
+
const rect = element.getBoundingClientRect();
|
|
60
|
+
const style = window.getComputedStyle(element);
|
|
61
|
+
state[key] = {
|
|
62
|
+
x: rect.x,
|
|
63
|
+
y: rect.y,
|
|
64
|
+
width: rect.width,
|
|
65
|
+
height: rect.height,
|
|
66
|
+
visible: rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none'
|
|
67
|
+
};
|
|
68
|
+
} else {
|
|
69
|
+
state[key] = null;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return state;
|
|
74
|
+
}, { selectors });
|
|
75
|
+
|
|
76
|
+
// Compare gameState with expectedState
|
|
77
|
+
const discrepancies = [];
|
|
78
|
+
|
|
79
|
+
// If we have gameState, compare it with expectedState
|
|
80
|
+
if (gameState && typeof gameState === 'object') {
|
|
81
|
+
compareObjects(gameState, expectedState, '', discrepancies, tolerance);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Compare visualState with expectedState (for position-based validation)
|
|
85
|
+
if (Object.keys(selectors).length > 0) {
|
|
86
|
+
for (const [key, expected] of Object.entries(expectedState)) {
|
|
87
|
+
if (selectors[key]) {
|
|
88
|
+
const actual = visualState[key];
|
|
89
|
+
if (!actual) {
|
|
90
|
+
discrepancies.push(`${key}: Element not found (selector: ${selectors[key]})`);
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (!actual.visible) {
|
|
95
|
+
discrepancies.push(`${key}: Element not visible (selector: ${selectors[key]})`);
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (expected.x !== undefined && typeof expected.x === 'number') {
|
|
100
|
+
const diff = Math.abs(actual.x - expected.x);
|
|
101
|
+
if (diff > tolerance) {
|
|
102
|
+
discrepancies.push(`${key}.x: Expected ${expected.x}, got ${actual.x} (diff: ${diff}px, tolerance: ${tolerance}px)`);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (expected.y !== undefined && typeof expected.y === 'number') {
|
|
107
|
+
const diff = Math.abs(actual.y - expected.y);
|
|
108
|
+
if (diff > tolerance) {
|
|
109
|
+
discrepancies.push(`${key}.y: Expected ${expected.y}, got ${actual.y} (diff: ${diff}px, tolerance: ${tolerance}px)`);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
matches: discrepancies.length === 0,
|
|
118
|
+
discrepancies,
|
|
119
|
+
visualState,
|
|
120
|
+
expectedState,
|
|
121
|
+
gameState
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Validate element position matches expected position
|
|
127
|
+
*
|
|
128
|
+
* @param {any} page - Playwright page object
|
|
129
|
+
* @param {string} selector - CSS selector for element
|
|
130
|
+
* @param {object} expectedPosition - Expected position {x, y} or {x, y, width, height}
|
|
131
|
+
* @param {number} tolerance - Pixel tolerance (default: 5)
|
|
132
|
+
* @returns {Promise<{matches: boolean, actual: object, expected: object, diff: object, error?: string}>}
|
|
133
|
+
* @throws {ValidationError} If page is not a valid Playwright Page object or inputs are invalid
|
|
134
|
+
*/
|
|
135
|
+
export async function validateElementPosition(page, selector, expectedPosition, tolerance = 5) {
|
|
136
|
+
// Validate inputs
|
|
137
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
138
|
+
throw new ValidationError('validateElementPosition requires a Playwright Page object', {
|
|
139
|
+
received: typeof page,
|
|
140
|
+
hasEvaluate: typeof page?.evaluate === 'function'
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
assertString(selector, 'selector');
|
|
145
|
+
assertObject(expectedPosition, 'expectedPosition');
|
|
146
|
+
assertNumber(tolerance, 'tolerance');
|
|
147
|
+
|
|
148
|
+
if (tolerance < 0 || isNaN(tolerance)) {
|
|
149
|
+
throw new ValidationError('tolerance must be a non-negative number', { received: tolerance });
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const actual = await page.evaluate((sel) => {
|
|
153
|
+
const element = document.querySelector(sel);
|
|
154
|
+
if (!element) return null;
|
|
155
|
+
const rect = element.getBoundingClientRect();
|
|
156
|
+
return { x: rect.x, y: rect.y, width: rect.width, height: rect.height };
|
|
157
|
+
}, selector);
|
|
158
|
+
|
|
159
|
+
if (!actual) {
|
|
160
|
+
return { matches: false, error: 'Element not found', selector, expected: expectedPosition };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const diff = {
|
|
164
|
+
x: Math.abs(actual.x - (expectedPosition.x || 0)),
|
|
165
|
+
y: Math.abs(actual.y - (expectedPosition.y || 0))
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
if (expectedPosition.width !== undefined) {
|
|
169
|
+
diff.width = Math.abs(actual.width - expectedPosition.width);
|
|
170
|
+
}
|
|
171
|
+
if (expectedPosition.height !== undefined) {
|
|
172
|
+
diff.height = Math.abs(actual.height - expectedPosition.height);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const matches = diff.x <= tolerance && diff.y <= tolerance &&
|
|
176
|
+
(expectedPosition.width === undefined || (diff.width !== undefined && diff.width <= tolerance)) &&
|
|
177
|
+
(expectedPosition.height === undefined || (diff.height !== undefined && diff.height <= tolerance));
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
matches,
|
|
181
|
+
actual,
|
|
182
|
+
expected: expectedPosition,
|
|
183
|
+
diff,
|
|
184
|
+
tolerance
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Recursive object comparison helper
|
|
190
|
+
*
|
|
191
|
+
* @param {unknown} extracted - Extracted state
|
|
192
|
+
* @param {unknown} expected - Expected state
|
|
193
|
+
* @param {string} path - Current path in object tree
|
|
194
|
+
* @param {string[]} discrepancies - Array to collect discrepancies
|
|
195
|
+
* @param {number} tolerance - Pixel tolerance for numeric comparisons
|
|
196
|
+
* @param {number} depth - Current recursion depth (prevents stack overflow)
|
|
197
|
+
*/
|
|
198
|
+
function compareObjects(extracted, expected, path, discrepancies, tolerance, depth = 0) {
|
|
199
|
+
// Prevent stack overflow on deeply nested objects
|
|
200
|
+
if (depth > 100) {
|
|
201
|
+
discrepancies.push(`${path}: Maximum comparison depth (100) exceeded - possible circular reference or extremely deep nesting`);
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (typeof expected !== typeof extracted) {
|
|
206
|
+
discrepancies.push(`${path}: Type mismatch (expected ${typeof expected}, got ${typeof extracted})`);
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (typeof expected === 'object' && expected !== null && extracted !== null) {
|
|
211
|
+
if (Array.isArray(expected)) {
|
|
212
|
+
if (!Array.isArray(extracted)) {
|
|
213
|
+
discrepancies.push(`${path}: Expected array, got ${typeof extracted}`);
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
if (expected.length !== extracted.length) {
|
|
217
|
+
discrepancies.push(`${path}: Array length mismatch (expected ${expected.length}, got ${extracted.length})`);
|
|
218
|
+
}
|
|
219
|
+
expected.forEach((item, i) => {
|
|
220
|
+
compareObjects(extracted[i], item, `${path}[${i}]`, discrepancies, tolerance, depth + 1);
|
|
221
|
+
});
|
|
222
|
+
} else {
|
|
223
|
+
const allKeys = new Set([...Object.keys(expected), ...Object.keys(extracted)]);
|
|
224
|
+
allKeys.forEach(key => {
|
|
225
|
+
const newPath = path ? `${path}.${key}` : key;
|
|
226
|
+
if (!(key in expected)) {
|
|
227
|
+
discrepancies.push(`${newPath}: Unexpected key in extracted state`);
|
|
228
|
+
} else if (!(key in extracted)) {
|
|
229
|
+
discrepancies.push(`${newPath}: Missing key in extracted state`);
|
|
230
|
+
} else {
|
|
231
|
+
compareObjects(extracted[key], expected[key], newPath, discrepancies, tolerance, depth + 1);
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
} else if (typeof expected === 'number' && typeof extracted === 'number') {
|
|
236
|
+
// Handle NaN values
|
|
237
|
+
if (isNaN(expected) || isNaN(extracted)) {
|
|
238
|
+
if (!(isNaN(expected) && isNaN(extracted))) {
|
|
239
|
+
discrepancies.push(`${path}: NaN value detected (expected ${expected}, got ${extracted})`);
|
|
240
|
+
}
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Handle Infinity values
|
|
245
|
+
if (!isFinite(expected) || !isFinite(extracted)) {
|
|
246
|
+
if (expected !== extracted) {
|
|
247
|
+
discrepancies.push(`${path}: Infinity value mismatch (expected ${expected}, got ${extracted})`);
|
|
248
|
+
}
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const diff = Math.abs(extracted - expected);
|
|
253
|
+
if (diff > tolerance) {
|
|
254
|
+
discrepancies.push(`${path}: Value differs by ${diff} (expected ${expected}, got ${extracted}, tolerance: ${tolerance})`);
|
|
255
|
+
}
|
|
256
|
+
} else if (extracted !== expected) {
|
|
257
|
+
discrepancies.push(`${path}: Value mismatch (expected ${expected}, got ${extracted})`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* State Validator
|
|
3
|
+
*
|
|
4
|
+
* Generic state validation for any structured state (game state, UI state, form state, etc.)
|
|
5
|
+
*
|
|
6
|
+
* Provides:
|
|
7
|
+
* - Configurable state extraction
|
|
8
|
+
* - Deep comparison with tolerance
|
|
9
|
+
* - Extensible via plugins
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { validateScreenshot } from '../judge.mjs';
|
|
13
|
+
import { ValidationError, StateMismatchError } from '../errors.mjs';
|
|
14
|
+
import { assertString, assertObject } from '../type-guards.mjs';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Generic state validator for any structured state validation
|
|
18
|
+
* Works with game state, UI state, form state, etc.
|
|
19
|
+
*/
|
|
20
|
+
export class StateValidator {
|
|
21
|
+
constructor(options = {}) {
|
|
22
|
+
// Validate tolerance
|
|
23
|
+
if (options.tolerance !== undefined) {
|
|
24
|
+
if (typeof options.tolerance !== 'number' || options.tolerance < 0 || isNaN(options.tolerance)) {
|
|
25
|
+
throw new ValidationError(
|
|
26
|
+
'tolerance must be a non-negative number',
|
|
27
|
+
{ received: options.tolerance }
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
this.tolerance = options.tolerance;
|
|
31
|
+
} else {
|
|
32
|
+
this.tolerance = 5; // pixels for position-based validation
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
this.validateScreenshot = options.validateScreenshot || validateScreenshot;
|
|
36
|
+
this.stateExtractor = options.stateExtractor || this.defaultStateExtractor.bind(this);
|
|
37
|
+
this.stateComparator = options.stateComparator || this.defaultStateComparator.bind(this);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Static method for quick validation without instantiation
|
|
42
|
+
*
|
|
43
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
44
|
+
* @param {object} expectedState - Expected state object
|
|
45
|
+
* @param {object} options - Validation options (tolerance, testType, etc.)
|
|
46
|
+
* @returns {Promise<object>} Validation result with extracted state and comparison
|
|
47
|
+
*/
|
|
48
|
+
static async validate(screenshotPath, expectedState, options = {}) {
|
|
49
|
+
const validator = new StateValidator(options);
|
|
50
|
+
return validator.validateState(screenshotPath, expectedState, options);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Validate state matches visual representation
|
|
55
|
+
*
|
|
56
|
+
* @param {string | string[]} screenshotPath - Path to screenshot(s) - supports multi-image for comparison
|
|
57
|
+
* @param {object} expectedState - Expected state object
|
|
58
|
+
* @param {object} options - Validation options
|
|
59
|
+
* @param {function} options.promptBuilder - Custom prompt builder function
|
|
60
|
+
* @param {object} options.context - Additional context for validation
|
|
61
|
+
* @returns {Promise<object>} Validation result with extracted state and comparison
|
|
62
|
+
*/
|
|
63
|
+
async validateState(screenshotPath, expectedState, options = {}) {
|
|
64
|
+
// Input validation - support both single and array
|
|
65
|
+
const isArray = Array.isArray(screenshotPath);
|
|
66
|
+
if (!isArray) {
|
|
67
|
+
assertString(screenshotPath, 'screenshotPath');
|
|
68
|
+
} else {
|
|
69
|
+
screenshotPath.forEach((path, i) => {
|
|
70
|
+
assertString(path, `screenshotPath[${i}]`);
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
assertObject(expectedState, 'expectedState');
|
|
74
|
+
|
|
75
|
+
if (!expectedState || typeof expectedState !== 'object') {
|
|
76
|
+
throw new ValidationError(
|
|
77
|
+
'expectedState must be a non-null object',
|
|
78
|
+
{ received: typeof expectedState, value: expectedState }
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const prompt = options.promptBuilder
|
|
83
|
+
? options.promptBuilder(expectedState, { tolerance: this.tolerance, ...options })
|
|
84
|
+
: this.buildStatePrompt(expectedState, options);
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
// Pass through all validateScreenshot options (useCache, timeout, provider, viewport, etc.)
|
|
88
|
+
const screenshotOptions = {
|
|
89
|
+
testType: options.testType || 'state-validation',
|
|
90
|
+
expectedState,
|
|
91
|
+
...options.context,
|
|
92
|
+
// Explicitly pass through common options
|
|
93
|
+
useCache: options.useCache !== undefined ? options.useCache : options.context?.useCache,
|
|
94
|
+
timeout: options.timeout || options.context?.timeout,
|
|
95
|
+
provider: options.provider || options.context?.provider,
|
|
96
|
+
viewport: options.viewport || options.context?.viewport
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Support multi-image (array of screenshots) for comparison
|
|
100
|
+
const result = await this.validateScreenshot(screenshotPath, prompt, screenshotOptions);
|
|
101
|
+
|
|
102
|
+
// Extract state from result
|
|
103
|
+
const extractedState = this.stateExtractor(result, expectedState);
|
|
104
|
+
|
|
105
|
+
// Compare with expected
|
|
106
|
+
const validation = this.stateComparator(extractedState, expectedState, {
|
|
107
|
+
tolerance: this.tolerance,
|
|
108
|
+
...options
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const matches = validation.discrepancies.length === 0;
|
|
112
|
+
|
|
113
|
+
// Throw StateMismatchError if validation fails and throwOnMismatch is enabled
|
|
114
|
+
if (!matches && options.throwOnMismatch !== false) {
|
|
115
|
+
throw new StateMismatchError(
|
|
116
|
+
validation.discrepancies,
|
|
117
|
+
extractedState,
|
|
118
|
+
expectedState
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
...result,
|
|
124
|
+
extractedState,
|
|
125
|
+
expectedState,
|
|
126
|
+
validation,
|
|
127
|
+
matches
|
|
128
|
+
};
|
|
129
|
+
} catch (error) {
|
|
130
|
+
// Re-throw ValidationError as-is, wrap others
|
|
131
|
+
if (error instanceof ValidationError) {
|
|
132
|
+
throw error;
|
|
133
|
+
}
|
|
134
|
+
throw new ValidationError(
|
|
135
|
+
`State validation failed: ${error.message}`,
|
|
136
|
+
{ screenshotPath, expectedState, originalError: error.message }
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Build generic state validation prompt
|
|
143
|
+
*/
|
|
144
|
+
buildStatePrompt(expectedState, options = {}) {
|
|
145
|
+
const stateDescription = options.stateDescription || 'current state';
|
|
146
|
+
const extractionTasks = options.extractionTasks || [
|
|
147
|
+
'Extract current state from screenshot',
|
|
148
|
+
'Compare with expected state',
|
|
149
|
+
'Report discrepancies'
|
|
150
|
+
];
|
|
151
|
+
|
|
152
|
+
return `Extract ${stateDescription} from screenshot with precision:
|
|
153
|
+
|
|
154
|
+
EXPECTED STATE:
|
|
155
|
+
${JSON.stringify(expectedState, null, 2)}
|
|
156
|
+
|
|
157
|
+
EXTRACTION TASKS:
|
|
158
|
+
${extractionTasks.map((task, i) => `${i + 1}. ${task}`).join('\n')}
|
|
159
|
+
|
|
160
|
+
VALIDATION:
|
|
161
|
+
- Compare extracted state to expected state
|
|
162
|
+
- Report discrepancies with differences
|
|
163
|
+
${this.tolerance ? `- Tolerance: ${this.tolerance}px for positions` : ''}
|
|
164
|
+
|
|
165
|
+
Return structured data with extracted state and validation results.`;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Default state extractor - tries to extract from structured data or parse from text
|
|
170
|
+
*/
|
|
171
|
+
defaultStateExtractor(result, expectedState) {
|
|
172
|
+
// Try structured data first
|
|
173
|
+
if (result.structuredData) {
|
|
174
|
+
// Validate that structuredData is an object
|
|
175
|
+
if (typeof result.structuredData === 'object' && result.structuredData !== null) {
|
|
176
|
+
return result.structuredData;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Try to parse from reasoning/assessment
|
|
181
|
+
const text = result.reasoning || result.assessment || '';
|
|
182
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
183
|
+
if (jsonMatch) {
|
|
184
|
+
try {
|
|
185
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
186
|
+
// Validate parsed result is an object
|
|
187
|
+
if (typeof parsed === 'object' && parsed !== null) {
|
|
188
|
+
return parsed;
|
|
189
|
+
}
|
|
190
|
+
} catch (e) {
|
|
191
|
+
// Fall through - extraction failed
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Return null to indicate extraction failed
|
|
196
|
+
// Note: null might be a valid state value, but this is the best we can do
|
|
197
|
+
// without a sentinel value. Callers should check if extraction succeeded.
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Default state comparator - deep comparison with tolerance for numeric values
|
|
203
|
+
*/
|
|
204
|
+
defaultStateComparator(extracted, expected, options = {}) {
|
|
205
|
+
if (!extracted || !expected) {
|
|
206
|
+
return {
|
|
207
|
+
matches: false,
|
|
208
|
+
discrepancies: ['Missing state data']
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const discrepancies = [];
|
|
213
|
+
const tolerance = options.tolerance || this.tolerance;
|
|
214
|
+
|
|
215
|
+
// Recursive comparison
|
|
216
|
+
this.compareObjects(extracted, expected, '', discrepancies, tolerance);
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
matches: discrepancies.length === 0,
|
|
220
|
+
discrepancies
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Recursive object comparison helper
|
|
226
|
+
* @param {number} depth - Current recursion depth (prevents stack overflow)
|
|
227
|
+
*/
|
|
228
|
+
compareObjects(extracted, expected, path, discrepancies, tolerance, depth = 0) {
|
|
229
|
+
// Prevent stack overflow on deeply nested objects
|
|
230
|
+
if (depth > 100) {
|
|
231
|
+
discrepancies.push(`${path}: Maximum comparison depth (100) exceeded - possible circular reference or extremely deep nesting`);
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (typeof expected !== typeof extracted) {
|
|
236
|
+
discrepancies.push(`${path}: Type mismatch (expected ${typeof expected}, got ${typeof extracted})`);
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (typeof expected === 'object' && expected !== null && extracted !== null) {
|
|
241
|
+
if (Array.isArray(expected)) {
|
|
242
|
+
if (!Array.isArray(extracted)) {
|
|
243
|
+
discrepancies.push(`${path}: Expected array, got ${typeof extracted}`);
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
if (expected.length !== extracted.length) {
|
|
247
|
+
discrepancies.push(`${path}: Array length mismatch (expected ${expected.length}, got ${extracted.length})`);
|
|
248
|
+
}
|
|
249
|
+
expected.forEach((item, i) => {
|
|
250
|
+
this.compareObjects(extracted[i], item, `${path}[${i}]`, discrepancies, tolerance, depth + 1);
|
|
251
|
+
});
|
|
252
|
+
} else {
|
|
253
|
+
const allKeys = new Set([...Object.keys(expected), ...Object.keys(extracted)]);
|
|
254
|
+
allKeys.forEach(key => {
|
|
255
|
+
const newPath = path ? `${path}.${key}` : key;
|
|
256
|
+
if (!(key in expected)) {
|
|
257
|
+
discrepancies.push(`${newPath}: Unexpected key in extracted state`);
|
|
258
|
+
} else if (!(key in extracted)) {
|
|
259
|
+
discrepancies.push(`${newPath}: Missing key in extracted state`);
|
|
260
|
+
} else {
|
|
261
|
+
this.compareObjects(extracted[key], expected[key], newPath, discrepancies, tolerance, depth + 1);
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
} else if (typeof expected === 'number' && typeof extracted === 'number') {
|
|
266
|
+
// Handle NaN values
|
|
267
|
+
if (isNaN(expected) || isNaN(extracted)) {
|
|
268
|
+
if (!(isNaN(expected) && isNaN(extracted))) {
|
|
269
|
+
discrepancies.push(`${path}: NaN value detected (expected ${expected}, got ${extracted})`);
|
|
270
|
+
}
|
|
271
|
+
return;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Handle Infinity values
|
|
275
|
+
if (!isFinite(expected) || !isFinite(extracted)) {
|
|
276
|
+
if (expected !== extracted) {
|
|
277
|
+
discrepancies.push(`${path}: Infinity value mismatch (expected ${expected}, got ${extracted})`);
|
|
278
|
+
}
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const diff = Math.abs(extracted - expected);
|
|
283
|
+
if (diff > tolerance) {
|
|
284
|
+
discrepancies.push(`${path}: Value differs by ${diff} (expected ${expected}, got ${extracted}, tolerance: ${tolerance})`);
|
|
285
|
+
}
|
|
286
|
+
} else if (extracted !== expected) {
|
|
287
|
+
discrepancies.push(`${path}: Value mismatch (expected ${expected}, got ${extracted})`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
package/vercel.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 2,
|
|
3
|
+
"builds": [
|
|
4
|
+
{
|
|
5
|
+
"src": "api/**/*.js",
|
|
6
|
+
"use": "@vercel/node"
|
|
7
|
+
}
|
|
8
|
+
],
|
|
9
|
+
"routes": [
|
|
10
|
+
{
|
|
11
|
+
"src": "/api/validate",
|
|
12
|
+
"dest": "/api/validate.js"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"src": "/api/health",
|
|
16
|
+
"dest": "/api/health.js"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"src": "/",
|
|
20
|
+
"dest": "/public/index.html"
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"env": {
|
|
24
|
+
"NODE_ENV": "production"
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|