@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* High-Level Convenience Functions
|
|
3
|
+
*
|
|
4
|
+
* Provides simplified APIs for common workflows, reducing boilerplate
|
|
5
|
+
* and making the library easier to use for common patterns.
|
|
6
|
+
*
|
|
7
|
+
* Based on common visual testing workflows and usage patterns.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { validateScreenshot } from './judge.mjs';
|
|
11
|
+
import { normalizeValidationResult } from './validation-result-normalizer.mjs';
|
|
12
|
+
import { experiencePageAsPersona, experiencePageWithPersonas } from './persona-experience.mjs';
|
|
13
|
+
import { extractRenderedCode, captureTemporalScreenshots, multiPerspectiveEvaluation } from './multi-modal.mjs';
|
|
14
|
+
import { aggregateTemporalNotes, formatNotesForPrompt } from './temporal.mjs';
|
|
15
|
+
import { aggregateMultiScale } from './temporal-decision.mjs';
|
|
16
|
+
import { generateGamePrompt, createGameGoal, createGameGoals } from './game-goal-prompts.mjs';
|
|
17
|
+
import { checkCrossModalConsistency, validateExperienceConsistency } from './cross-modal-consistency.mjs';
|
|
18
|
+
import { trackPropagation } from './experience-propagation.mjs';
|
|
19
|
+
import { ValidationError } from './errors.mjs';
|
|
20
|
+
import { log, warn } from './logger.mjs';
|
|
21
|
+
import { TEMPORAL_CONSTANTS } from './constants.mjs';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Test gameplay with variable goals
|
|
25
|
+
*
|
|
26
|
+
* Complete workflow for testing games with variable goals/prompts.
|
|
27
|
+
* Originally motivated by interactive web applications that require
|
|
28
|
+
* real-time validation, variable goals, and temporal understanding.
|
|
29
|
+
*
|
|
30
|
+
* Handles persona experience, temporal capture, goal evaluation, and consistency checks.
|
|
31
|
+
*
|
|
32
|
+
* Supports interactive games:
|
|
33
|
+
* - Games that activate from payment screens (not just standalone games)
|
|
34
|
+
* - Game activation via keyboard shortcuts (e.g., 'g' key)
|
|
35
|
+
* - Game state extraction (window.gameState)
|
|
36
|
+
* - Temporal preprocessing for better performance
|
|
37
|
+
*
|
|
38
|
+
* @param {import('playwright').Page} page - Playwright page object
|
|
39
|
+
* @param {Object} options - Test options
|
|
40
|
+
* @param {string} options.url - Game URL (or page URL if game activates from page)
|
|
41
|
+
* @param {string | Object | Array | Function} [options.goals] - Variable goals (string, object, array, or function)
|
|
42
|
+
* @param {Array<Object>} [options.personas] - Personas to test with
|
|
43
|
+
* @param {boolean} [options.captureTemporal] - Capture temporal screenshots
|
|
44
|
+
* @param {number} [options.fps] - FPS for temporal capture
|
|
45
|
+
* @param {number} [options.duration] - Duration for temporal capture (ms)
|
|
46
|
+
* @param {boolean} [options.captureCode] - Extract rendered code
|
|
47
|
+
* @param {boolean} [options.checkConsistency] - Check cross-modal consistency
|
|
48
|
+
* @param {string} [options.gameActivationKey] - Keyboard key to activate game (e.g., 'g')
|
|
49
|
+
* @param {string} [options.gameSelector] - Selector to wait for game activation (e.g., '#game-paddle')
|
|
50
|
+
* @param {boolean} [options.useTemporalPreprocessing] - Use temporal preprocessing for better performance
|
|
51
|
+
* @param {boolean} [options.play] - If true, actually play the game (uses playGame() internally)
|
|
52
|
+
* @returns {Promise<Object>} Test results
|
|
53
|
+
*/
|
|
54
|
+
export async function testGameplay(page, options = {}) {
|
|
55
|
+
const {
|
|
56
|
+
url,
|
|
57
|
+
goals = ['fun', 'accessibility', 'performance'],
|
|
58
|
+
personas = null,
|
|
59
|
+
captureTemporal = false,
|
|
60
|
+
fps = 2,
|
|
61
|
+
duration = 5000,
|
|
62
|
+
captureCode = true,
|
|
63
|
+
checkConsistency = true,
|
|
64
|
+
gameActivationKey = null, // e.g., 'g' to activate game
|
|
65
|
+
gameSelector = null, // e.g., '#game-paddle' selector
|
|
66
|
+
useTemporalPreprocessing = false,
|
|
67
|
+
play = false // NEW: Option to actually play the game
|
|
68
|
+
} = options;
|
|
69
|
+
|
|
70
|
+
// If play mode, use playGame() function
|
|
71
|
+
if (play) {
|
|
72
|
+
const { playGame } = await import('./game-player.mjs');
|
|
73
|
+
const goal = Array.isArray(goals) ? goals[0] : goals;
|
|
74
|
+
const goalString = typeof goal === 'string' ? goal : goal?.description || 'Play the game well';
|
|
75
|
+
|
|
76
|
+
return await playGame(page, {
|
|
77
|
+
goal: goalString,
|
|
78
|
+
maxSteps: options.maxSteps || 100,
|
|
79
|
+
fps: options.fps || 2,
|
|
80
|
+
gameActivationKey,
|
|
81
|
+
gameSelector,
|
|
82
|
+
url
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (!url) {
|
|
87
|
+
throw new ValidationError('testGameplay: url is required', { function: 'testGameplay', parameter: 'url' });
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
log('[Convenience] Testing gameplay:', { url, goals, gameActivationKey, gameSelector });
|
|
91
|
+
|
|
92
|
+
const result = {
|
|
93
|
+
url,
|
|
94
|
+
goals: Array.isArray(goals) ? goals : [goals],
|
|
95
|
+
experiences: [],
|
|
96
|
+
evaluations: [],
|
|
97
|
+
aggregated: null,
|
|
98
|
+
consistency: null,
|
|
99
|
+
propagation: []
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
// Navigate to game/page
|
|
104
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
105
|
+
await page.waitForLoadState('networkidle');
|
|
106
|
+
|
|
107
|
+
// Activate game if needed (for games that activate from other screens)
|
|
108
|
+
if (gameActivationKey) {
|
|
109
|
+
log(`[Convenience] Activating game with key: ${gameActivationKey}`);
|
|
110
|
+
await page.keyboard.press(gameActivationKey);
|
|
111
|
+
|
|
112
|
+
// Wait for game to activate
|
|
113
|
+
if (gameSelector) {
|
|
114
|
+
await page.waitForSelector(gameSelector, { timeout: 5000 }).catch(() => {
|
|
115
|
+
warn(`[Convenience] Game selector ${gameSelector} not found after activation`);
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Wait a bit for game to initialize
|
|
120
|
+
await page.waitForTimeout(500);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Extract rendered code
|
|
124
|
+
let renderedCode = null;
|
|
125
|
+
if (captureCode) {
|
|
126
|
+
renderedCode = await extractRenderedCode(page);
|
|
127
|
+
trackPropagation('capture', { renderedCode }, 'Captured HTML/CSS for gameplay test');
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Extract game state (handles window.gameState structure and other games)
|
|
131
|
+
const gameState = await page.evaluate(() => {
|
|
132
|
+
const state = window.gameState || {};
|
|
133
|
+
return {
|
|
134
|
+
gameActive: state.gameActive || false,
|
|
135
|
+
score: state.score || 0,
|
|
136
|
+
level: state.level || 0,
|
|
137
|
+
lives: state.lives || 3,
|
|
138
|
+
ball: state.ball || null,
|
|
139
|
+
paddle: state.paddle || null,
|
|
140
|
+
bricks: state.bricks || null,
|
|
141
|
+
// Include any other game state properties
|
|
142
|
+
...state
|
|
143
|
+
};
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
// Experience with personas (if provided)
|
|
147
|
+
if (personas && personas.length > 0) {
|
|
148
|
+
const experiences = await experiencePageWithPersonas(page, personas, {
|
|
149
|
+
url,
|
|
150
|
+
captureScreenshots: true,
|
|
151
|
+
captureCode: captureCode,
|
|
152
|
+
captureState: true
|
|
153
|
+
});
|
|
154
|
+
result.experiences = experiences;
|
|
155
|
+
} else {
|
|
156
|
+
// Single experience
|
|
157
|
+
const experience = await experiencePageAsPersona(page, {
|
|
158
|
+
name: 'Game Tester',
|
|
159
|
+
perspective: 'Testing gameplay experience',
|
|
160
|
+
focus: ['gameplay', 'fun', 'accessibility']
|
|
161
|
+
}, {
|
|
162
|
+
url,
|
|
163
|
+
captureScreenshots: true,
|
|
164
|
+
captureCode: captureCode,
|
|
165
|
+
captureState: true
|
|
166
|
+
});
|
|
167
|
+
result.experiences = [experience];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// IMPROVEMENT: Capture temporal screenshots with preprocessing support
|
|
171
|
+
if (captureTemporal) {
|
|
172
|
+
const temporalScreenshots = await captureTemporalScreenshots(page, fps, duration);
|
|
173
|
+
result.temporalScreenshots = temporalScreenshots;
|
|
174
|
+
trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
|
|
175
|
+
|
|
176
|
+
// IMPROVEMENT: Use temporal preprocessing if requested (better performance)
|
|
177
|
+
if (useTemporalPreprocessing && temporalScreenshots.length > 0) {
|
|
178
|
+
const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
|
|
179
|
+
const preprocessingManager = createTemporalPreprocessingManager();
|
|
180
|
+
const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
|
|
181
|
+
|
|
182
|
+
const notes = temporalScreenshots.map((frame, index) => ({
|
|
183
|
+
timestamp: frame.timestamp,
|
|
184
|
+
elapsed: frame.elapsed || index * (1000 / fps),
|
|
185
|
+
screenshotPath: frame.path,
|
|
186
|
+
step: `gameplay_frame_${index}`,
|
|
187
|
+
observation: `Frame ${index} of gameplay`
|
|
188
|
+
}));
|
|
189
|
+
|
|
190
|
+
const processed = await adaptiveProcessor.process(notes, {
|
|
191
|
+
testType: 'gameplay-temporal',
|
|
192
|
+
viewport: await page.viewportSize()
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
result.processedTemporalNotes = processed;
|
|
196
|
+
trackPropagation('temporal-preprocessing', {
|
|
197
|
+
original: notes.length,
|
|
198
|
+
processed: processed.length
|
|
199
|
+
}, 'Processed temporal notes with adaptive preprocessing');
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Evaluate with variable goals
|
|
204
|
+
const goalArray = Array.isArray(goals) ? goals : [goals];
|
|
205
|
+
const goalEvaluations = [];
|
|
206
|
+
|
|
207
|
+
for (const goal of goalArray) {
|
|
208
|
+
// Use last screenshot from experience
|
|
209
|
+
const screenshotPath = result.experiences[0]?.screenshots?.[result.experiences[0].screenshots.length - 1]?.path;
|
|
210
|
+
if (!screenshotPath) {
|
|
211
|
+
warn('[Convenience] No screenshot available for goal evaluation');
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Generate prompt from goal (for display/debugging, goal also used by prompt composition)
|
|
216
|
+
const prompt = generateGamePrompt(goal, {
|
|
217
|
+
gameState,
|
|
218
|
+
renderedCode,
|
|
219
|
+
stage: 'gameplay'
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// Use aggregated notes from experience if available
|
|
223
|
+
const experience = result.experiences[0];
|
|
224
|
+
const temporalNotes = experience?.aggregated || null;
|
|
225
|
+
|
|
226
|
+
// Validate with goal in context (prompt composition system will use goal)
|
|
227
|
+
// Include temporal notes for richer context
|
|
228
|
+
const evaluation = await validateScreenshot(screenshotPath, prompt, {
|
|
229
|
+
testType: 'gameplay-goal',
|
|
230
|
+
gameState,
|
|
231
|
+
renderedCode,
|
|
232
|
+
goal: goal, // Pass goal in context - prompt composition system will use it
|
|
233
|
+
temporalNotes: temporalNotes, // Include aggregated temporal notes
|
|
234
|
+
enableUncertaintyReduction: true // Enable uncertainty reduction for gameplay testing
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
goalEvaluations.push({
|
|
238
|
+
goal: typeof goal === 'string' ? goal : goal.description || 'unknown',
|
|
239
|
+
evaluation,
|
|
240
|
+
prompt
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
result.evaluations = goalEvaluations;
|
|
245
|
+
|
|
246
|
+
// Use aggregated notes from experiences (automatically included)
|
|
247
|
+
// Also aggregate across all experiences for cross-experience analysis
|
|
248
|
+
const allNotes = result.experiences.flatMap(exp => exp.notes || []);
|
|
249
|
+
|
|
250
|
+
// Always return aggregated notes (even if empty) for consistency
|
|
251
|
+
if (allNotes.length > 0) {
|
|
252
|
+
// Use fixed temporal aggregation system
|
|
253
|
+
const aggregated = aggregateTemporalNotes(allNotes, {
|
|
254
|
+
windowSize: 5000,
|
|
255
|
+
decayFactor: 0.9
|
|
256
|
+
});
|
|
257
|
+
result.aggregated = aggregated;
|
|
258
|
+
|
|
259
|
+
// Also use multi-scale aggregation for richer analysis
|
|
260
|
+
// Always return multi-scale result (even if empty) for consistency
|
|
261
|
+
try {
|
|
262
|
+
const { aggregateMultiScale } = await import('./temporal-decision.mjs');
|
|
263
|
+
const aggregatedMultiScale = aggregateMultiScale(allNotes, {
|
|
264
|
+
attentionWeights: true
|
|
265
|
+
});
|
|
266
|
+
// Ensure it has the expected structure
|
|
267
|
+
if (!aggregatedMultiScale.scales) {
|
|
268
|
+
aggregatedMultiScale.scales = {};
|
|
269
|
+
}
|
|
270
|
+
if (!aggregatedMultiScale.coherence) {
|
|
271
|
+
aggregatedMultiScale.coherence = {};
|
|
272
|
+
}
|
|
273
|
+
result.aggregatedMultiScale = aggregatedMultiScale;
|
|
274
|
+
} catch (error) {
|
|
275
|
+
warn(`[Convenience] Multi-scale aggregation failed: ${error.message}`);
|
|
276
|
+
// Return empty multi-scale result instead of null
|
|
277
|
+
result.aggregatedMultiScale = {
|
|
278
|
+
scales: {},
|
|
279
|
+
summary: 'Multi-scale aggregation failed',
|
|
280
|
+
coherence: {}
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
trackPropagation('aggregation', {
|
|
285
|
+
windows: aggregated.windows.length,
|
|
286
|
+
coherence: aggregated.coherence,
|
|
287
|
+
scales: Object.keys(result.aggregatedMultiScale.scales || {})
|
|
288
|
+
}, 'Aggregated temporal notes with multi-scale');
|
|
289
|
+
} else {
|
|
290
|
+
// Return empty aggregated structure if no notes (for consistency)
|
|
291
|
+
result.aggregated = {
|
|
292
|
+
windows: [],
|
|
293
|
+
coherence: 0,
|
|
294
|
+
summary: 'No notes to aggregate',
|
|
295
|
+
timeSpan: 0
|
|
296
|
+
};
|
|
297
|
+
result.aggregatedMultiScale = {
|
|
298
|
+
scales: {},
|
|
299
|
+
summary: 'No notes to aggregate',
|
|
300
|
+
coherence: {}
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// Use aggregated notes from individual experiences too
|
|
305
|
+
result.experiences.forEach((exp, i) => {
|
|
306
|
+
if (exp.aggregated) {
|
|
307
|
+
trackPropagation('experience-aggregation', {
|
|
308
|
+
experienceIndex: i,
|
|
309
|
+
windows: exp.aggregated.windows.length,
|
|
310
|
+
coherence: exp.aggregated.coherence
|
|
311
|
+
}, `Experience ${i} has aggregated notes`);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
// Check cross-modal consistency (if requested)
|
|
316
|
+
if (checkConsistency && renderedCode && result.experiences[0]?.screenshots?.length > 0) {
|
|
317
|
+
const screenshotPath = result.experiences[0].screenshots[result.experiences[0].screenshots.length - 1].path;
|
|
318
|
+
const consistency = checkCrossModalConsistency({
|
|
319
|
+
screenshot: screenshotPath,
|
|
320
|
+
renderedCode,
|
|
321
|
+
pageState: gameState
|
|
322
|
+
});
|
|
323
|
+
result.consistency = consistency;
|
|
324
|
+
trackPropagation('consistency', { isConsistent: consistency.isConsistent }, 'Checked cross-modal consistency');
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Get propagation history
|
|
328
|
+
const { getPropagationTracker } = await import('./experience-propagation.mjs');
|
|
329
|
+
result.propagation = getPropagationTracker().getHistory();
|
|
330
|
+
|
|
331
|
+
} catch (error) {
|
|
332
|
+
warn(`[Convenience] Gameplay test failed: ${error.message}`);
|
|
333
|
+
result.error = error.message;
|
|
334
|
+
|
|
335
|
+
// Ensure aggregated structures are always present even on error
|
|
336
|
+
if (!result.aggregated) {
|
|
337
|
+
result.aggregated = {
|
|
338
|
+
windows: [],
|
|
339
|
+
coherence: 0,
|
|
340
|
+
summary: 'Error during aggregation',
|
|
341
|
+
timeSpan: 0
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
if (!result.aggregatedMultiScale) {
|
|
345
|
+
result.aggregatedMultiScale = {
|
|
346
|
+
scales: {},
|
|
347
|
+
summary: 'Error during aggregation',
|
|
348
|
+
coherence: {}
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Final check - ensure aggregated structures are always present
|
|
354
|
+
if (!result.aggregated) {
|
|
355
|
+
result.aggregated = {
|
|
356
|
+
windows: [],
|
|
357
|
+
coherence: 0,
|
|
358
|
+
summary: 'No aggregation performed',
|
|
359
|
+
timeSpan: 0
|
|
360
|
+
};
|
|
361
|
+
}
|
|
362
|
+
if (!result.aggregatedMultiScale) {
|
|
363
|
+
result.aggregatedMultiScale = {
|
|
364
|
+
scales: {},
|
|
365
|
+
summary: 'No aggregation performed',
|
|
366
|
+
coherence: {}
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
return result;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Test browser experience with multiple stages
|
|
375
|
+
*
|
|
376
|
+
* Complete workflow for testing browser experiences across multiple stages
|
|
377
|
+
* (initial, form, payment, gameplay, etc.).
|
|
378
|
+
*
|
|
379
|
+
* @param {import('playwright').Page} page - Playwright page object
|
|
380
|
+
* @param {Object} options - Test options
|
|
381
|
+
* @param {string} options.url - Page URL
|
|
382
|
+
* @param {Array<Object>} [options.personas] - Personas to test with
|
|
383
|
+
* @param {Array<string>} [options.stages] - Stages to test ('initial', 'form', 'payment', 'gameplay')
|
|
384
|
+
* @param {boolean} [options.captureCode] - Extract rendered code
|
|
385
|
+
* @param {boolean} [options.captureTemporal] - Capture temporal screenshots
|
|
386
|
+
* @returns {Promise<Object>} Test results
|
|
387
|
+
*/
|
|
388
|
+
export async function testBrowserExperience(page, options = {}) {
|
|
389
|
+
const {
|
|
390
|
+
url,
|
|
391
|
+
personas = null,
|
|
392
|
+
stages = ['initial', 'form', 'payment'],
|
|
393
|
+
captureCode = true,
|
|
394
|
+
captureTemporal = false
|
|
395
|
+
} = options;
|
|
396
|
+
|
|
397
|
+
if (!url) {
|
|
398
|
+
throw new ValidationError('testBrowserExperience: url is required', { function: 'testBrowserExperience', parameter: 'url' });
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
log('[Convenience] Testing browser experience:', { url, stages });
|
|
402
|
+
|
|
403
|
+
const result = {
|
|
404
|
+
url,
|
|
405
|
+
stages: [],
|
|
406
|
+
experiences: [],
|
|
407
|
+
evaluations: []
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
try {
|
|
411
|
+
// Navigate to page
|
|
412
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
413
|
+
await page.waitForLoadState('networkidle');
|
|
414
|
+
|
|
415
|
+
// Test each stage
|
|
416
|
+
for (const stage of stages) {
|
|
417
|
+
log(`[Convenience] Testing stage: ${stage}`);
|
|
418
|
+
|
|
419
|
+
// Extract rendered code for this stage
|
|
420
|
+
let renderedCode = null;
|
|
421
|
+
if (captureCode) {
|
|
422
|
+
renderedCode = await extractRenderedCode(page);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
// Get page state
|
|
426
|
+
const pageState = await page.evaluate(() => {
|
|
427
|
+
return {
|
|
428
|
+
title: document.title,
|
|
429
|
+
url: window.location.href,
|
|
430
|
+
viewport: {
|
|
431
|
+
width: window.innerWidth,
|
|
432
|
+
height: window.innerHeight
|
|
433
|
+
}
|
|
434
|
+
};
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
// Experience with personas (if provided)
|
|
438
|
+
if (personas && personas.length > 0) {
|
|
439
|
+
const experiences = await experiencePageWithPersonas(page, personas, {
|
|
440
|
+
url,
|
|
441
|
+
captureScreenshots: true,
|
|
442
|
+
captureCode: captureCode,
|
|
443
|
+
captureState: true
|
|
444
|
+
});
|
|
445
|
+
result.experiences.push(...experiences);
|
|
446
|
+
} else {
|
|
447
|
+
// Single experience
|
|
448
|
+
const experience = await experiencePageAsPersona(page, {
|
|
449
|
+
name: 'Browser Tester',
|
|
450
|
+
perspective: `Testing ${stage} stage`,
|
|
451
|
+
focus: ['usability', 'accessibility']
|
|
452
|
+
}, {
|
|
453
|
+
url,
|
|
454
|
+
captureScreenshots: true,
|
|
455
|
+
captureCode: captureCode,
|
|
456
|
+
captureState: true
|
|
457
|
+
});
|
|
458
|
+
result.experiences.push(experience);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Evaluate this stage
|
|
462
|
+
// Use aggregated notes from experience if available
|
|
463
|
+
const lastExperience = result.experiences[result.experiences.length - 1];
|
|
464
|
+
const screenshotPath = lastExperience?.screenshots?.[0]?.path;
|
|
465
|
+
if (screenshotPath) {
|
|
466
|
+
const prompt = `Evaluate the ${stage} stage. Check for usability, accessibility, and user experience.`;
|
|
467
|
+
|
|
468
|
+
// Include temporal notes from experience
|
|
469
|
+
const temporalNotes = lastExperience?.aggregated || null;
|
|
470
|
+
|
|
471
|
+
const evaluation = await validateScreenshot(screenshotPath, prompt, {
|
|
472
|
+
testType: `browser-experience-${stage}`,
|
|
473
|
+
renderedCode,
|
|
474
|
+
pageState,
|
|
475
|
+
temporalNotes: temporalNotes, // Include aggregated temporal notes
|
|
476
|
+
enableUncertaintyReduction: true // Enable uncertainty reduction for comprehensive testing
|
|
477
|
+
});
|
|
478
|
+
result.evaluations.push({
|
|
479
|
+
stage,
|
|
480
|
+
evaluation
|
|
481
|
+
});
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
result.stages.push(stage);
|
|
485
|
+
|
|
486
|
+
// Aggregate temporal notes across all stages
|
|
487
|
+
const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
|
|
488
|
+
if (allStageNotes.length > 0) {
|
|
489
|
+
const stageAggregated = aggregateTemporalNotes(allStageNotes, {
|
|
490
|
+
windowSize: 10000,
|
|
491
|
+
decayFactor: 0.9
|
|
492
|
+
});
|
|
493
|
+
result.aggregated = stageAggregated;
|
|
494
|
+
|
|
495
|
+
// Multi-scale aggregation across stages
|
|
496
|
+
const stageMultiScale = aggregateMultiScale(allStageNotes, {
|
|
497
|
+
attentionWeights: true
|
|
498
|
+
});
|
|
499
|
+
result.aggregatedMultiScale = stageMultiScale;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Navigate to next stage (if needed)
|
|
503
|
+
if (stage === 'form' && stages.includes('payment')) {
|
|
504
|
+
// Fill form to get to payment
|
|
505
|
+
try {
|
|
506
|
+
await page.fill('#name', 'Test User');
|
|
507
|
+
await page.fill('#amount', '5');
|
|
508
|
+
await page.click('#continue-btn');
|
|
509
|
+
await page.waitForSelector('#payment:not(.hidden)', { timeout: 10000 });
|
|
510
|
+
} catch (e) {
|
|
511
|
+
warn(`[Convenience] Could not navigate to payment stage: ${e.message}`);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
} catch (error) {
|
|
517
|
+
warn(`[Convenience] Browser experience test failed: ${error.message}`);
|
|
518
|
+
result.error = error.message;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
return result;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Validate screenshot with variable goals
|
|
526
|
+
*
|
|
527
|
+
* Simplified API for validating screenshots with variable goals/prompts.
|
|
528
|
+
* Supports string goals, goal objects, arrays, and functions.
|
|
529
|
+
*
|
|
530
|
+
* Originally motivated by interactive web applications
|
|
531
|
+
* that requires real-time validation, variable goals, and temporal understanding.
|
|
532
|
+
*
|
|
533
|
+
* Supports:
|
|
534
|
+
* - Brutalist rubric goals
|
|
535
|
+
* - Accessibility goals with contrast requirements
|
|
536
|
+
* - Game state validation goals
|
|
537
|
+
* - Better error messages and context
|
|
538
|
+
*
|
|
539
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
540
|
+
* @param {Object} options - Validation options
|
|
541
|
+
* @param {string | Object | Array | Function} options.goal - Variable goal (string, object, array, or function)
|
|
542
|
+
* @param {Object} [options.gameState] - Game state (if applicable)
|
|
543
|
+
* @param {Object} [options.renderedCode] - Rendered code (if available)
|
|
544
|
+
* @param {Object} [options.persona] - Persona (if applicable)
|
|
545
|
+
* @param {Object} [options.context] - Additional context
|
|
546
|
+
* @returns {Promise<Object>} Validation result
|
|
547
|
+
*/
|
|
548
|
+
export async function validateWithGoals(screenshotPath, options = {}) {
|
|
549
|
+
const {
|
|
550
|
+
goal,
|
|
551
|
+
gameState = null,
|
|
552
|
+
renderedCode = null,
|
|
553
|
+
persona = null,
|
|
554
|
+
context = {}
|
|
555
|
+
} = options;
|
|
556
|
+
|
|
557
|
+
if (!screenshotPath) {
|
|
558
|
+
throw new ValidationError('validateWithGoals: screenshotPath is required', { function: 'validateWithGoals', parameter: 'screenshotPath' });
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
if (!goal) {
|
|
562
|
+
throw new ValidationError('validateWithGoals: goal is required', { function: 'validateWithGoals', parameter: 'goal' });
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
log('[Convenience] Validating with goal:', { screenshotPath, goal: typeof goal === 'string' ? goal : goal.description || 'object' });
|
|
566
|
+
|
|
567
|
+
// Generate prompt from goal (for display/debugging; goal also used by prompt composition)
|
|
568
|
+
const prompt = generateGamePrompt(goal, {
|
|
569
|
+
gameState,
|
|
570
|
+
renderedCode,
|
|
571
|
+
persona,
|
|
572
|
+
...context
|
|
573
|
+
});
|
|
574
|
+
|
|
575
|
+
// Include temporal notes if available in context
|
|
576
|
+
let temporalNotes = null;
|
|
577
|
+
if (context.aggregated) {
|
|
578
|
+
temporalNotes = context.aggregated;
|
|
579
|
+
} else if (context.temporalNotes) {
|
|
580
|
+
temporalNotes = context.temporalNotes;
|
|
581
|
+
} else if (context.notes && context.notes.length > 0) {
|
|
582
|
+
// Auto-aggregate if notes provided but not aggregated
|
|
583
|
+
try {
|
|
584
|
+
temporalNotes = aggregateTemporalNotes(context.notes, {
|
|
585
|
+
windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
|
|
586
|
+
decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
|
|
587
|
+
});
|
|
588
|
+
} catch (error) {
|
|
589
|
+
warn('[Convenience] Auto-aggregation failed, continuing without temporal notes:', error.message);
|
|
590
|
+
temporalNotes = null;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// Validate with goal in context (prompt composition system will use goal)
|
|
595
|
+
// Include temporal notes for richer context
|
|
596
|
+
// Merge context options (allow override of testType, enableUncertaintyReduction, etc.)
|
|
597
|
+
const validationContext = {
|
|
598
|
+
testType: 'goal-validation',
|
|
599
|
+
gameState,
|
|
600
|
+
renderedCode,
|
|
601
|
+
goal: goal, // Pass goal in context - prompt composition system will use it
|
|
602
|
+
temporalNotes: temporalNotes, // Include aggregated temporal notes
|
|
603
|
+
...context // Allow context to override defaults (e.g., testType, enableUncertaintyReduction)
|
|
604
|
+
};
|
|
605
|
+
|
|
606
|
+
const result = await validateScreenshot(screenshotPath, prompt, validationContext);
|
|
607
|
+
|
|
608
|
+
// Normalize result structure (ensures consistent return type)
|
|
609
|
+
const normalizedResult = normalizeValidationResult(result, 'validateWithGoals');
|
|
610
|
+
|
|
611
|
+
return {
|
|
612
|
+
goal: typeof goal === 'string' ? goal : goal.description || 'unknown',
|
|
613
|
+
prompt,
|
|
614
|
+
result: normalizedResult
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
|