@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multi-Modal Validator
|
|
3
|
+
*
|
|
4
|
+
* Enhanced validation using:
|
|
5
|
+
* 1. Multi-modal inputs: Screenshots + rendered HTML/CSS + game state + principles
|
|
6
|
+
* 2. Multi-perspective: Multiple personas evaluating same state
|
|
7
|
+
* 3. Temporal: Screenshots at different Hz for animations
|
|
8
|
+
*
|
|
9
|
+
* Note: This module requires Playwright Page object.
|
|
10
|
+
* It's designed to work with @playwright/test but doesn't require it as a hard dependency.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { ValidationError } from './errors.mjs';
|
|
14
|
+
import { warn } from './logger.mjs';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Extract rendered HTML/CSS for dual-view validation
|
|
18
|
+
*
|
|
19
|
+
* Captures both source code (HTML/CSS) and rendered state for multi-modal validation.
|
|
20
|
+
* This enables validation against both the "source of truth" (code) and the "rendered output" (visuals).
|
|
21
|
+
*
|
|
22
|
+
* Dual View Benefits:
|
|
23
|
+
* - Detect CSS rendering issues (code says one thing, visual shows another)
|
|
24
|
+
* - Validate structural correctness (DOM matches expectations)
|
|
25
|
+
* - Check computed styles vs. source styles
|
|
26
|
+
* - Identify layout bugs (positioning, z-index, visibility)
|
|
27
|
+
* - Verify accessibility attributes in code vs. visual rendering
|
|
28
|
+
*
|
|
29
|
+
* @param {any} page - Playwright page object
|
|
30
|
+
* @param {Object} [options] - Extraction options
|
|
31
|
+
* @param {string[]} [options.selectors] - Custom selectors to extract (defaults to common patterns)
|
|
32
|
+
* @param {number} [options.htmlLimit=10000] - Max HTML chars to extract (default 10k)
|
|
33
|
+
* @param {boolean} [options.includeAllCSS=false] - Include all computed styles (default: only critical)
|
|
34
|
+
* @returns {Promise<import('./index.mjs').RenderedCode>} Rendered code structure
|
|
35
|
+
* @throws {ValidationError} If page is not a valid Playwright Page object
|
|
36
|
+
*/
|
|
37
|
+
export async function extractRenderedCode(page, options = {}) {
|
|
38
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
39
|
+
throw new ValidationError('extractRenderedCode requires a Playwright Page object', {
|
|
40
|
+
received: typeof page,
|
|
41
|
+
hasEvaluate: typeof page?.evaluate === 'function'
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const {
|
|
46
|
+
selectors = null, // Custom selectors, or null for auto-detection
|
|
47
|
+
htmlLimit = 10000,
|
|
48
|
+
includeAllCSS = false
|
|
49
|
+
} = options;
|
|
50
|
+
|
|
51
|
+
// Extract full HTML (source of truth)
|
|
52
|
+
const html = await page.content();
|
|
53
|
+
|
|
54
|
+
// Extract all stylesheets (source CSS)
|
|
55
|
+
const stylesheets = await page.evaluate(() => {
|
|
56
|
+
const sheets = [];
|
|
57
|
+
for (const sheet of document.styleSheets) {
|
|
58
|
+
try {
|
|
59
|
+
const rules = [];
|
|
60
|
+
for (const rule of sheet.cssRules || []) {
|
|
61
|
+
rules.push({
|
|
62
|
+
selectorText: rule.selectorText,
|
|
63
|
+
cssText: rule.cssText,
|
|
64
|
+
style: rule.style ? Object.fromEntries(
|
|
65
|
+
Array.from(rule.style).map(prop => [prop, rule.style.getPropertyValue(prop)])
|
|
66
|
+
) : null
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
sheets.push({
|
|
70
|
+
href: sheet.href,
|
|
71
|
+
rules: rules.slice(0, 100) // Limit to first 100 rules per sheet
|
|
72
|
+
});
|
|
73
|
+
} catch (e) {
|
|
74
|
+
// Cross-origin stylesheets may throw
|
|
75
|
+
sheets.push({ href: sheet.href, error: 'Cross-origin or inaccessible' });
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return sheets;
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Extract critical CSS (computed styles for key elements)
|
|
82
|
+
// This is the "rendered" CSS (what actually applies, not source)
|
|
83
|
+
const criticalCSS = await page.evaluate((customSelectors) => {
|
|
84
|
+
const styles = {};
|
|
85
|
+
|
|
86
|
+
// Auto-detect common selectors if not provided
|
|
87
|
+
const selectorsToCheck = customSelectors || [
|
|
88
|
+
'body',
|
|
89
|
+
'main',
|
|
90
|
+
'header',
|
|
91
|
+
'footer',
|
|
92
|
+
'[role="main"]',
|
|
93
|
+
'[role="banner"]',
|
|
94
|
+
'[role="contentinfo"]',
|
|
95
|
+
'button',
|
|
96
|
+
'a',
|
|
97
|
+
'input',
|
|
98
|
+
'form',
|
|
99
|
+
'#app',
|
|
100
|
+
'#root',
|
|
101
|
+
'.container',
|
|
102
|
+
'.main-content'
|
|
103
|
+
];
|
|
104
|
+
|
|
105
|
+
selectorsToCheck.forEach(selector => {
|
|
106
|
+
try {
|
|
107
|
+
const el = document.querySelector(selector);
|
|
108
|
+
if (el) {
|
|
109
|
+
const computed = window.getComputedStyle(el);
|
|
110
|
+
styles[selector] = {
|
|
111
|
+
position: computed.position,
|
|
112
|
+
top: computed.top,
|
|
113
|
+
bottom: computed.bottom,
|
|
114
|
+
left: computed.left,
|
|
115
|
+
right: computed.right,
|
|
116
|
+
width: computed.width,
|
|
117
|
+
height: computed.height,
|
|
118
|
+
backgroundColor: computed.backgroundColor,
|
|
119
|
+
color: computed.color,
|
|
120
|
+
display: computed.display,
|
|
121
|
+
visibility: computed.visibility,
|
|
122
|
+
zIndex: computed.zIndex,
|
|
123
|
+
transform: computed.transform,
|
|
124
|
+
opacity: computed.opacity,
|
|
125
|
+
fontSize: computed.fontSize,
|
|
126
|
+
fontFamily: computed.fontFamily,
|
|
127
|
+
lineHeight: computed.lineHeight,
|
|
128
|
+
margin: computed.margin,
|
|
129
|
+
padding: computed.padding,
|
|
130
|
+
border: computed.border,
|
|
131
|
+
borderRadius: computed.borderRadius,
|
|
132
|
+
boxShadow: computed.boxShadow,
|
|
133
|
+
overflow: computed.overflow,
|
|
134
|
+
textAlign: computed.textAlign
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
} catch (e) {
|
|
138
|
+
// Skip invalid selectors
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
return styles;
|
|
143
|
+
}, selectors);
|
|
144
|
+
|
|
145
|
+
// Extract DOM structure (text-encoded representation)
|
|
146
|
+
const domStructure = await page.evaluate(() => {
|
|
147
|
+
const structure = {
|
|
148
|
+
body: {
|
|
149
|
+
tagName: document.body?.tagName,
|
|
150
|
+
children: document.body?.children?.length || 0,
|
|
151
|
+
textContent: document.body?.textContent?.substring(0, 500) || '',
|
|
152
|
+
attributes: Array.from(document.body?.attributes || []).reduce((acc, attr) => {
|
|
153
|
+
acc[attr.name] = attr.value;
|
|
154
|
+
return acc;
|
|
155
|
+
}, {})
|
|
156
|
+
},
|
|
157
|
+
head: {
|
|
158
|
+
title: document.title,
|
|
159
|
+
meta: Array.from(document.querySelectorAll('meta')).map(m => ({
|
|
160
|
+
name: m.getAttribute('name') || m.getAttribute('property'),
|
|
161
|
+
content: m.getAttribute('content')
|
|
162
|
+
})),
|
|
163
|
+
links: Array.from(document.querySelectorAll('link[rel="stylesheet"]')).map(l => ({
|
|
164
|
+
href: l.href,
|
|
165
|
+
rel: l.rel
|
|
166
|
+
}))
|
|
167
|
+
},
|
|
168
|
+
mainElements: []
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Extract key elements (auto-detect)
|
|
172
|
+
const keySelectors = [
|
|
173
|
+
'main', '[role="main"]', '#app', '#root',
|
|
174
|
+
'header', '[role="banner"]',
|
|
175
|
+
'footer', '[role="contentinfo"]',
|
|
176
|
+
'nav', '[role="navigation"]',
|
|
177
|
+
'article', '[role="article"]',
|
|
178
|
+
'section'
|
|
179
|
+
];
|
|
180
|
+
|
|
181
|
+
keySelectors.forEach(selector => {
|
|
182
|
+
try {
|
|
183
|
+
const el = document.querySelector(selector);
|
|
184
|
+
if (el) {
|
|
185
|
+
structure.mainElements.push({
|
|
186
|
+
selector: selector,
|
|
187
|
+
tagName: el.tagName,
|
|
188
|
+
id: el.id,
|
|
189
|
+
className: el.className,
|
|
190
|
+
textContent: el.textContent?.substring(0, 200) || '',
|
|
191
|
+
attributes: Array.from(el.attributes).reduce((acc, attr) => {
|
|
192
|
+
acc[attr.name] = attr.value;
|
|
193
|
+
return acc;
|
|
194
|
+
}, {}),
|
|
195
|
+
boundingRect: el.getBoundingClientRect(),
|
|
196
|
+
computedStyles: {
|
|
197
|
+
display: window.getComputedStyle(el).display,
|
|
198
|
+
visibility: window.getComputedStyle(el).visibility,
|
|
199
|
+
position: window.getComputedStyle(el).position
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
} catch (e) {
|
|
204
|
+
// Skip invalid selectors
|
|
205
|
+
}
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
return structure;
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
// Source code (text-encoded HTML)
|
|
213
|
+
html: html.substring(0, htmlLimit),
|
|
214
|
+
|
|
215
|
+
// Source CSS (from stylesheets)
|
|
216
|
+
stylesheets: stylesheets,
|
|
217
|
+
|
|
218
|
+
// Rendered CSS (computed styles - what actually applies)
|
|
219
|
+
criticalCSS,
|
|
220
|
+
|
|
221
|
+
// DOM structure (text-encoded representation)
|
|
222
|
+
domStructure,
|
|
223
|
+
|
|
224
|
+
// Metadata
|
|
225
|
+
timestamp: Date.now(),
|
|
226
|
+
url: page.url(),
|
|
227
|
+
viewport: {
|
|
228
|
+
width: page.viewportSize()?.width || 0,
|
|
229
|
+
height: page.viewportSize()?.height || 0
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Capture temporal screenshots (for animations)
|
|
236
|
+
*
|
|
237
|
+
* @param {any} page - Playwright page object
|
|
238
|
+
* @param {number} [fps=2] - Frames per second to capture
|
|
239
|
+
* @param {number} [duration=2000] - Duration in milliseconds
|
|
240
|
+
* @returns {Promise<import('./index.mjs').TemporalScreenshot[]>} Array of temporal screenshots
|
|
241
|
+
* @throws {ValidationError} If page is not a valid Playwright Page object
|
|
242
|
+
*/
|
|
243
|
+
export async function captureTemporalScreenshots(page, fps = 2, duration = 2000, options = {}) {
|
|
244
|
+
if (!page || typeof page.screenshot !== 'function') {
|
|
245
|
+
throw new ValidationError('captureTemporalScreenshots requires a Playwright Page object', {
|
|
246
|
+
received: typeof page,
|
|
247
|
+
hasScreenshot: typeof page?.screenshot === 'function'
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const {
|
|
252
|
+
optimizeForSpeed = false, // Optimize screenshot quality for high FPS
|
|
253
|
+
outputDir = 'test-results'
|
|
254
|
+
} = options;
|
|
255
|
+
|
|
256
|
+
const screenshots = [];
|
|
257
|
+
const interval = 1000 / fps; // ms between frames
|
|
258
|
+
const frames = Math.floor(duration / interval);
|
|
259
|
+
|
|
260
|
+
// Optimize screenshot quality for high FPS to reduce overhead
|
|
261
|
+
const screenshotOptions = {
|
|
262
|
+
type: 'png'
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
if (optimizeForSpeed && fps > 30) {
|
|
266
|
+
// For high FPS (>30fps), use lower quality to reduce overhead
|
|
267
|
+
screenshotOptions.quality = 70; // Lower quality (if supported by format)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
for (let i = 0; i < frames; i++) {
|
|
271
|
+
const timestamp = Date.now();
|
|
272
|
+
const path = `${outputDir}/temporal-${timestamp}-${i}.png`;
|
|
273
|
+
|
|
274
|
+
try {
|
|
275
|
+
await page.screenshot({ ...screenshotOptions, path });
|
|
276
|
+
screenshots.push({ path, frame: i, timestamp });
|
|
277
|
+
|
|
278
|
+
// Use more efficient timing for high FPS
|
|
279
|
+
// For very high FPS (>30), use smaller wait intervals to maintain accuracy
|
|
280
|
+
if (fps > 30) {
|
|
281
|
+
// Calculate actual elapsed time and adjust wait
|
|
282
|
+
const elapsed = Date.now() - timestamp;
|
|
283
|
+
const waitTime = Math.max(0, interval - elapsed);
|
|
284
|
+
if (waitTime > 0) {
|
|
285
|
+
await page.waitForTimeout(waitTime);
|
|
286
|
+
}
|
|
287
|
+
} else {
|
|
288
|
+
await page.waitForTimeout(interval);
|
|
289
|
+
}
|
|
290
|
+
} catch (error) {
|
|
291
|
+
warn(`[Temporal Capture] Screenshot ${i} failed: ${error.message}`);
|
|
292
|
+
// Continue with next frame
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return screenshots;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Multi-perspective evaluation
|
|
301
|
+
* Multiple personas evaluate the same state
|
|
302
|
+
*
|
|
303
|
+
* @param {(path: string, prompt: string, context: import('./index.mjs').ValidationContext) => Promise<import('./index.mjs').ValidationResult>} validateFn - Function to validate screenshot
|
|
304
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
305
|
+
* @param {import('./index.mjs').RenderedCode} renderedCode - Rendered code structure
|
|
306
|
+
* @param {Record<string, unknown>} [gameState={}] - Game state (optional)
|
|
307
|
+
* @param {import('./index.mjs').Persona[] | null} [personas=null] - Array of persona objects (optional)
|
|
308
|
+
* @returns {Promise<import('./index.mjs').PerspectiveEvaluation[]>} Array of perspective evaluations
|
|
309
|
+
* @throws {ValidationError} If validateFn is not a function
|
|
310
|
+
*/
|
|
311
|
+
export async function multiPerspectiveEvaluation(validateFn, screenshotPath, renderedCode, gameState = {}, personas = null) {
|
|
312
|
+
if (!validateFn || typeof validateFn !== 'function') {
|
|
313
|
+
throw new ValidationError('multiPerspectiveEvaluation requires a validate function', {
|
|
314
|
+
received: typeof validateFn
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Default personas if not provided
|
|
319
|
+
const defaultPersonas = [
|
|
320
|
+
{
|
|
321
|
+
name: 'Brutalist Designer',
|
|
322
|
+
perspective: 'I evaluate based on brutalist design principles. Function over decoration. High contrast. Minimal UI.',
|
|
323
|
+
focus: ['brutalist', 'contrast', 'minimalism', 'function']
|
|
324
|
+
},
|
|
325
|
+
{
|
|
326
|
+
name: 'Accessibility Advocate',
|
|
327
|
+
perspective: 'I evaluate based on accessibility standards. WCAG compliance. Keyboard navigation. Screen reader support.',
|
|
328
|
+
focus: ['accessibility', 'wcag', 'keyboard', 'screen-reader']
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
name: 'Queer Community Member',
|
|
332
|
+
perspective: 'I evaluate based on queer community values. Inclusivity. Representation. Safe space.',
|
|
333
|
+
focus: ['inclusivity', 'representation', 'community', 'values']
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
name: 'Game Designer',
|
|
337
|
+
perspective: 'I evaluate based on game design principles. Game feel. Mechanics. Balance.',
|
|
338
|
+
focus: ['game-feel', 'mechanics', 'balance', 'polish']
|
|
339
|
+
},
|
|
340
|
+
{
|
|
341
|
+
name: 'Product Purpose Validator',
|
|
342
|
+
perspective: 'I evaluate based on product purpose alignment. Primary purpose clarity. Easter egg appropriateness.',
|
|
343
|
+
focus: ['purpose', 'clarity', 'easter-egg', 'alignment']
|
|
344
|
+
}
|
|
345
|
+
];
|
|
346
|
+
|
|
347
|
+
const personasToUse = personas || defaultPersonas;
|
|
348
|
+
|
|
349
|
+
const evaluations = await Promise.all(
|
|
350
|
+
personasToUse.map(async (persona) => {
|
|
351
|
+
// Build prompt with persona perspective
|
|
352
|
+
const prompt = buildPersonaPrompt(persona, renderedCode, gameState);
|
|
353
|
+
|
|
354
|
+
// Support variable goals in context for cohesive integration
|
|
355
|
+
const evaluationContext = {
|
|
356
|
+
gameState,
|
|
357
|
+
renderedCode,
|
|
358
|
+
persona: persona.name,
|
|
359
|
+
perspective: persona.perspective,
|
|
360
|
+
focus: persona.focus,
|
|
361
|
+
...gameState
|
|
362
|
+
};
|
|
363
|
+
|
|
364
|
+
// If persona has a goal property, pass it through
|
|
365
|
+
if (persona.goal) {
|
|
366
|
+
evaluationContext.goal = persona.goal;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
const evaluation = await validateFn(screenshotPath, prompt, evaluationContext).catch(err => {
|
|
370
|
+
warn(`[Multi-Modal] Perspective ${persona.name} failed: ${err.message}`);
|
|
371
|
+
return null;
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
if (evaluation) {
|
|
375
|
+
return {
|
|
376
|
+
persona: persona.name,
|
|
377
|
+
perspective: persona.perspective,
|
|
378
|
+
focus: persona.focus,
|
|
379
|
+
evaluation
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
return null;
|
|
383
|
+
})
|
|
384
|
+
);
|
|
385
|
+
|
|
386
|
+
return evaluations.filter(e => e !== null);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Build persona-specific prompt
|
|
391
|
+
*/
|
|
392
|
+
function buildPersonaPrompt(persona, renderedCode, gameState) {
|
|
393
|
+
const renderedHTML = renderedCode.html ?
|
|
394
|
+
renderedCode.html.substring(0, 5000) :
|
|
395
|
+
'HTML not captured';
|
|
396
|
+
|
|
397
|
+
return `PERSONA PERSPECTIVE: ${persona.name}
|
|
398
|
+
${persona.perspective}
|
|
399
|
+
|
|
400
|
+
FOCUS AREAS: ${persona.focus.join(', ')}
|
|
401
|
+
|
|
402
|
+
RENDERED CODE ANALYSIS (DOM STRUCTURE):
|
|
403
|
+
${JSON.stringify(renderedCode.domStructure, null, 2)}
|
|
404
|
+
|
|
405
|
+
CSS VALIDATION (COMPUTED STYLES):
|
|
406
|
+
${JSON.stringify(renderedCode.criticalCSS, null, 2)}
|
|
407
|
+
|
|
408
|
+
GAME STATE (IF APPLICABLE):
|
|
409
|
+
${JSON.stringify(gameState, null, 2)}
|
|
410
|
+
|
|
411
|
+
EVALUATION TASK:
|
|
412
|
+
Evaluate this state from your persona's perspective. Consider:
|
|
413
|
+
1. Visual appearance (from screenshot)
|
|
414
|
+
2. Code correctness (from rendered code - check positioning, structure, styles)
|
|
415
|
+
3. State consistency (does visual match code and game state?)
|
|
416
|
+
4. Principles alignment (does it match design principles and product purpose?)
|
|
417
|
+
|
|
418
|
+
Provide evaluation from your persona's perspective.`;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Comprehensive multi-modal validation
|
|
423
|
+
*
|
|
424
|
+
* @param {(path: string, prompt: string, context: import('./index.mjs').ValidationContext) => Promise<import('./index.mjs').ValidationResult>} validateFn - Function to validate screenshot
|
|
425
|
+
* @param {any} page - Playwright page object
|
|
426
|
+
* @param {string} testName - Test name
|
|
427
|
+
* @param {{
|
|
428
|
+
* fps?: number;
|
|
429
|
+
* duration?: number;
|
|
430
|
+
* captureCode?: boolean;
|
|
431
|
+
* captureState?: boolean;
|
|
432
|
+
* multiPerspective?: boolean;
|
|
433
|
+
* }} [options={}] - Validation options
|
|
434
|
+
* @returns {Promise<{
|
|
435
|
+
* screenshotPath: string;
|
|
436
|
+
* renderedCode: import('./index.mjs').RenderedCode | null;
|
|
437
|
+
* gameState: Record<string, unknown>;
|
|
438
|
+
* temporalScreenshots: import('./index.mjs').TemporalScreenshot[];
|
|
439
|
+
* perspectives: import('./index.mjs').PerspectiveEvaluation[];
|
|
440
|
+
* codeValidation: Record<string, boolean>;
|
|
441
|
+
* aggregatedScore: number | null;
|
|
442
|
+
* aggregatedIssues: string[];
|
|
443
|
+
* timestamp: number;
|
|
444
|
+
* }>} Comprehensive validation result
|
|
445
|
+
* @throws {ValidationError} If validateFn is not a function or page is invalid
|
|
446
|
+
*/
|
|
447
|
+
export async function multiModalValidation(validateFn, page, testName, options = {}) {
|
|
448
|
+
if (!validateFn || typeof validateFn !== 'function') {
|
|
449
|
+
throw new ValidationError('multiModalValidation requires a validate function', {
|
|
450
|
+
received: typeof validateFn
|
|
451
|
+
});
|
|
452
|
+
}
|
|
453
|
+
if (!page || typeof page.screenshot !== 'function') {
|
|
454
|
+
throw new ValidationError('multiModalValidation requires a Playwright Page object', {
|
|
455
|
+
received: typeof page,
|
|
456
|
+
hasScreenshot: typeof page?.screenshot === 'function'
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const {
|
|
461
|
+
fps = 2, // Frames per second for temporal sampling
|
|
462
|
+
duration = 2000, // Duration in ms
|
|
463
|
+
captureCode = true,
|
|
464
|
+
captureState = true,
|
|
465
|
+
multiPerspective = true
|
|
466
|
+
} = options;
|
|
467
|
+
|
|
468
|
+
// 1. Capture screenshot
|
|
469
|
+
const screenshotPath = `test-results/multimodal-${testName}-${Date.now()}.png`;
|
|
470
|
+
await page.screenshot({ path: screenshotPath, type: 'png' });
|
|
471
|
+
|
|
472
|
+
// 2. Extract rendered code
|
|
473
|
+
const renderedCode = captureCode ? await extractRenderedCode(page) : null;
|
|
474
|
+
|
|
475
|
+
// 3. Extract game state
|
|
476
|
+
const gameState = captureState ? await page.evaluate(() => {
|
|
477
|
+
return window.gameState || {
|
|
478
|
+
gameActive: false,
|
|
479
|
+
bricks: [],
|
|
480
|
+
ball: null,
|
|
481
|
+
paddle: null
|
|
482
|
+
};
|
|
483
|
+
}) : {};
|
|
484
|
+
|
|
485
|
+
// 4. Capture temporal screenshots (for animations)
|
|
486
|
+
const temporalScreenshots = fps > 0 ? await captureTemporalScreenshots(page, fps, duration) : [];
|
|
487
|
+
|
|
488
|
+
// 5. Multi-perspective evaluation
|
|
489
|
+
const perspectives = multiPerspective
|
|
490
|
+
? await multiPerspectiveEvaluation(validateFn, screenshotPath, renderedCode, gameState)
|
|
491
|
+
: [];
|
|
492
|
+
|
|
493
|
+
// 6. Code validation (structural checks)
|
|
494
|
+
const codeValidation = renderedCode ? {
|
|
495
|
+
prideParadePosition: renderedCode.domStructure.prideParade?.computedTop === '0px' || renderedCode.domStructure.prideParade?.computedTop?.startsWith('calc'),
|
|
496
|
+
prideParadeFlagCount: (renderedCode.domStructure.prideParade?.flagRowCount || 0) >= 15,
|
|
497
|
+
flagsDynamicallyGenerated: (renderedCode.domStructure.prideParade?.flagRowCount || 0) >= 15,
|
|
498
|
+
footerPosition: renderedCode.domStructure.footer?.computedBottom === '0px' || renderedCode.domStructure.footer?.computedBottom?.startsWith('calc'),
|
|
499
|
+
footerStripeDynamicallyGenerated: renderedCode.domStructure.footer?.hasStripe === true,
|
|
500
|
+
paymentCodeVisible: renderedCode.domStructure.paymentCode?.visible === true
|
|
501
|
+
} : {};
|
|
502
|
+
|
|
503
|
+
// 7. Aggregate evaluation
|
|
504
|
+
const aggregatedScore = perspectives.length > 0
|
|
505
|
+
? perspectives.reduce((sum, p) => sum + (p.evaluation?.score || 0), 0) / perspectives.length
|
|
506
|
+
: null;
|
|
507
|
+
|
|
508
|
+
const aggregatedIssues = perspectives.length > 0
|
|
509
|
+
? [...new Set(perspectives.flatMap(p => p.evaluation?.issues || []))]
|
|
510
|
+
: [];
|
|
511
|
+
|
|
512
|
+
return {
|
|
513
|
+
screenshotPath,
|
|
514
|
+
renderedCode,
|
|
515
|
+
gameState,
|
|
516
|
+
temporalScreenshots,
|
|
517
|
+
perspectives,
|
|
518
|
+
codeValidation,
|
|
519
|
+
aggregatedScore,
|
|
520
|
+
aggregatedIssues,
|
|
521
|
+
timestamp: Date.now()
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
|