@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
@@ -0,0 +1,617 @@
1
+ /**
2
+ * High-Level Convenience Functions
3
+ *
4
+ * Provides simplified APIs for common workflows, reducing boilerplate
5
+ * and making the library easier to use for common patterns.
6
+ *
7
+ * Based on common visual testing workflows and usage patterns.
8
+ */
9
+
10
+ import { validateScreenshot } from './judge.mjs';
11
+ import { normalizeValidationResult } from './validation-result-normalizer.mjs';
12
+ import { experiencePageAsPersona, experiencePageWithPersonas } from './persona-experience.mjs';
13
+ import { extractRenderedCode, captureTemporalScreenshots, multiPerspectiveEvaluation } from './multi-modal.mjs';
14
+ import { aggregateTemporalNotes, formatNotesForPrompt } from './temporal.mjs';
15
+ import { aggregateMultiScale } from './temporal-decision.mjs';
16
+ import { generateGamePrompt, createGameGoal, createGameGoals } from './game-goal-prompts.mjs';
17
+ import { checkCrossModalConsistency, validateExperienceConsistency } from './cross-modal-consistency.mjs';
18
+ import { trackPropagation } from './experience-propagation.mjs';
19
+ import { ValidationError } from './errors.mjs';
20
+ import { log, warn } from './logger.mjs';
21
+ import { TEMPORAL_CONSTANTS } from './constants.mjs';
22
+
23
+ /**
24
+ * Test gameplay with variable goals
25
+ *
26
+ * Complete workflow for testing games with variable goals/prompts.
27
+ * Originally motivated by interactive web applications that require
28
+ * real-time validation, variable goals, and temporal understanding.
29
+ *
30
+ * Handles persona experience, temporal capture, goal evaluation, and consistency checks.
31
+ *
32
+ * Supports interactive games:
33
+ * - Games that activate from payment screens (not just standalone games)
34
+ * - Game activation via keyboard shortcuts (e.g., 'g' key)
35
+ * - Game state extraction (window.gameState)
36
+ * - Temporal preprocessing for better performance
37
+ *
38
+ * @param {import('playwright').Page} page - Playwright page object
39
+ * @param {Object} options - Test options
40
+ * @param {string} options.url - Game URL (or page URL if game activates from page)
41
+ * @param {string | Object | Array | Function} [options.goals] - Variable goals (string, object, array, or function)
42
+ * @param {Array<Object>} [options.personas] - Personas to test with
43
+ * @param {boolean} [options.captureTemporal] - Capture temporal screenshots
44
+ * @param {number} [options.fps] - FPS for temporal capture
45
+ * @param {number} [options.duration] - Duration for temporal capture (ms)
46
+ * @param {boolean} [options.captureCode] - Extract rendered code
47
+ * @param {boolean} [options.checkConsistency] - Check cross-modal consistency
48
+ * @param {string} [options.gameActivationKey] - Keyboard key to activate game (e.g., 'g')
49
+ * @param {string} [options.gameSelector] - Selector to wait for game activation (e.g., '#game-paddle')
50
+ * @param {boolean} [options.useTemporalPreprocessing] - Use temporal preprocessing for better performance
51
+ * @param {boolean} [options.play] - If true, actually play the game (uses playGame() internally)
52
+ * @returns {Promise<Object>} Test results
53
+ */
54
+ export async function testGameplay(page, options = {}) {
55
+ const {
56
+ url,
57
+ goals = ['fun', 'accessibility', 'performance'],
58
+ personas = null,
59
+ captureTemporal = false,
60
+ fps = 2,
61
+ duration = 5000,
62
+ captureCode = true,
63
+ checkConsistency = true,
64
+ gameActivationKey = null, // e.g., 'g' to activate game
65
+ gameSelector = null, // e.g., '#game-paddle' selector
66
+ useTemporalPreprocessing = false,
67
+ play = false // NEW: Option to actually play the game
68
+ } = options;
69
+
70
+ // If play mode, use playGame() function
71
+ if (play) {
72
+ const { playGame } = await import('./game-player.mjs');
73
+ const goal = Array.isArray(goals) ? goals[0] : goals;
74
+ const goalString = typeof goal === 'string' ? goal : goal?.description || 'Play the game well';
75
+
76
+ return await playGame(page, {
77
+ goal: goalString,
78
+ maxSteps: options.maxSteps || 100,
79
+ fps: options.fps || 2,
80
+ gameActivationKey,
81
+ gameSelector,
82
+ url
83
+ });
84
+ }
85
+
86
+ if (!url) {
87
+ throw new ValidationError('testGameplay: url is required', { function: 'testGameplay', parameter: 'url' });
88
+ }
89
+
90
+ log('[Convenience] Testing gameplay:', { url, goals, gameActivationKey, gameSelector });
91
+
92
+ const result = {
93
+ url,
94
+ goals: Array.isArray(goals) ? goals : [goals],
95
+ experiences: [],
96
+ evaluations: [],
97
+ aggregated: null,
98
+ consistency: null,
99
+ propagation: []
100
+ };
101
+
102
+ try {
103
+ // Navigate to game/page
104
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
105
+ await page.waitForLoadState('networkidle');
106
+
107
+ // Activate game if needed (for games that activate from other screens)
108
+ if (gameActivationKey) {
109
+ log(`[Convenience] Activating game with key: ${gameActivationKey}`);
110
+ await page.keyboard.press(gameActivationKey);
111
+
112
+ // Wait for game to activate
113
+ if (gameSelector) {
114
+ await page.waitForSelector(gameSelector, { timeout: 5000 }).catch(() => {
115
+ warn(`[Convenience] Game selector ${gameSelector} not found after activation`);
116
+ });
117
+ }
118
+
119
+ // Wait a bit for game to initialize
120
+ await page.waitForTimeout(500);
121
+ }
122
+
123
+ // Extract rendered code
124
+ let renderedCode = null;
125
+ if (captureCode) {
126
+ renderedCode = await extractRenderedCode(page);
127
+ trackPropagation('capture', { renderedCode }, 'Captured HTML/CSS for gameplay test');
128
+ }
129
+
130
+ // Extract game state (handles window.gameState structure and other games)
131
+ const gameState = await page.evaluate(() => {
132
+ const state = window.gameState || {};
133
+ return {
134
+ gameActive: state.gameActive || false,
135
+ score: state.score || 0,
136
+ level: state.level || 0,
137
+ lives: state.lives || 3,
138
+ ball: state.ball || null,
139
+ paddle: state.paddle || null,
140
+ bricks: state.bricks || null,
141
+ // Include any other game state properties
142
+ ...state
143
+ };
144
+ });
145
+
146
+ // Experience with personas (if provided)
147
+ if (personas && personas.length > 0) {
148
+ const experiences = await experiencePageWithPersonas(page, personas, {
149
+ url,
150
+ captureScreenshots: true,
151
+ captureCode: captureCode,
152
+ captureState: true
153
+ });
154
+ result.experiences = experiences;
155
+ } else {
156
+ // Single experience
157
+ const experience = await experiencePageAsPersona(page, {
158
+ name: 'Game Tester',
159
+ perspective: 'Testing gameplay experience',
160
+ focus: ['gameplay', 'fun', 'accessibility']
161
+ }, {
162
+ url,
163
+ captureScreenshots: true,
164
+ captureCode: captureCode,
165
+ captureState: true
166
+ });
167
+ result.experiences = [experience];
168
+ }
169
+
170
+ // IMPROVEMENT: Capture temporal screenshots with preprocessing support
171
+ if (captureTemporal) {
172
+ const temporalScreenshots = await captureTemporalScreenshots(page, fps, duration);
173
+ result.temporalScreenshots = temporalScreenshots;
174
+ trackPropagation('temporal', { count: temporalScreenshots.length }, 'Captured temporal screenshots');
175
+
176
+ // IMPROVEMENT: Use temporal preprocessing if requested (better performance)
177
+ if (useTemporalPreprocessing && temporalScreenshots.length > 0) {
178
+ const { createTemporalPreprocessingManager, createAdaptiveTemporalProcessor } = await import('./temporal-preprocessor.mjs');
179
+ const preprocessingManager = createTemporalPreprocessingManager();
180
+ const adaptiveProcessor = createAdaptiveTemporalProcessor(preprocessingManager);
181
+
182
+ const notes = temporalScreenshots.map((frame, index) => ({
183
+ timestamp: frame.timestamp,
184
+ elapsed: frame.elapsed || index * (1000 / fps),
185
+ screenshotPath: frame.path,
186
+ step: `gameplay_frame_${index}`,
187
+ observation: `Frame ${index} of gameplay`
188
+ }));
189
+
190
+ const processed = await adaptiveProcessor.process(notes, {
191
+ testType: 'gameplay-temporal',
192
+ viewport: await page.viewportSize()
193
+ });
194
+
195
+ result.processedTemporalNotes = processed;
196
+ trackPropagation('temporal-preprocessing', {
197
+ original: notes.length,
198
+ processed: processed.length
199
+ }, 'Processed temporal notes with adaptive preprocessing');
200
+ }
201
+ }
202
+
203
+ // Evaluate with variable goals
204
+ const goalArray = Array.isArray(goals) ? goals : [goals];
205
+ const goalEvaluations = [];
206
+
207
+ for (const goal of goalArray) {
208
+ // Use last screenshot from experience
209
+ const screenshotPath = result.experiences[0]?.screenshots?.[result.experiences[0].screenshots.length - 1]?.path;
210
+ if (!screenshotPath) {
211
+ warn('[Convenience] No screenshot available for goal evaluation');
212
+ continue;
213
+ }
214
+
215
+ // Generate prompt from goal (for display/debugging, goal also used by prompt composition)
216
+ const prompt = generateGamePrompt(goal, {
217
+ gameState,
218
+ renderedCode,
219
+ stage: 'gameplay'
220
+ });
221
+
222
+ // Use aggregated notes from experience if available
223
+ const experience = result.experiences[0];
224
+ const temporalNotes = experience?.aggregated || null;
225
+
226
+ // Validate with goal in context (prompt composition system will use goal)
227
+ // Include temporal notes for richer context
228
+ const evaluation = await validateScreenshot(screenshotPath, prompt, {
229
+ testType: 'gameplay-goal',
230
+ gameState,
231
+ renderedCode,
232
+ goal: goal, // Pass goal in context - prompt composition system will use it
233
+ temporalNotes: temporalNotes, // Include aggregated temporal notes
234
+ enableUncertaintyReduction: true // Enable uncertainty reduction for gameplay testing
235
+ });
236
+
237
+ goalEvaluations.push({
238
+ goal: typeof goal === 'string' ? goal : goal.description || 'unknown',
239
+ evaluation,
240
+ prompt
241
+ });
242
+ }
243
+
244
+ result.evaluations = goalEvaluations;
245
+
246
+ // Use aggregated notes from experiences (automatically included)
247
+ // Also aggregate across all experiences for cross-experience analysis
248
+ const allNotes = result.experiences.flatMap(exp => exp.notes || []);
249
+
250
+ // Always return aggregated notes (even if empty) for consistency
251
+ if (allNotes.length > 0) {
252
+ // Use fixed temporal aggregation system
253
+ const aggregated = aggregateTemporalNotes(allNotes, {
254
+ windowSize: 5000,
255
+ decayFactor: 0.9
256
+ });
257
+ result.aggregated = aggregated;
258
+
259
+ // Also use multi-scale aggregation for richer analysis
260
+ // Always return multi-scale result (even if empty) for consistency
261
+ try {
262
+ const { aggregateMultiScale } = await import('./temporal-decision.mjs');
263
+ const aggregatedMultiScale = aggregateMultiScale(allNotes, {
264
+ attentionWeights: true
265
+ });
266
+ // Ensure it has the expected structure
267
+ if (!aggregatedMultiScale.scales) {
268
+ aggregatedMultiScale.scales = {};
269
+ }
270
+ if (!aggregatedMultiScale.coherence) {
271
+ aggregatedMultiScale.coherence = {};
272
+ }
273
+ result.aggregatedMultiScale = aggregatedMultiScale;
274
+ } catch (error) {
275
+ warn(`[Convenience] Multi-scale aggregation failed: ${error.message}`);
276
+ // Return empty multi-scale result instead of null
277
+ result.aggregatedMultiScale = {
278
+ scales: {},
279
+ summary: 'Multi-scale aggregation failed',
280
+ coherence: {}
281
+ };
282
+ }
283
+
284
+ trackPropagation('aggregation', {
285
+ windows: aggregated.windows.length,
286
+ coherence: aggregated.coherence,
287
+ scales: Object.keys(result.aggregatedMultiScale.scales || {})
288
+ }, 'Aggregated temporal notes with multi-scale');
289
+ } else {
290
+ // Return empty aggregated structure if no notes (for consistency)
291
+ result.aggregated = {
292
+ windows: [],
293
+ coherence: 0,
294
+ summary: 'No notes to aggregate',
295
+ timeSpan: 0
296
+ };
297
+ result.aggregatedMultiScale = {
298
+ scales: {},
299
+ summary: 'No notes to aggregate',
300
+ coherence: {}
301
+ };
302
+ }
303
+
304
+ // Use aggregated notes from individual experiences too
305
+ result.experiences.forEach((exp, i) => {
306
+ if (exp.aggregated) {
307
+ trackPropagation('experience-aggregation', {
308
+ experienceIndex: i,
309
+ windows: exp.aggregated.windows.length,
310
+ coherence: exp.aggregated.coherence
311
+ }, `Experience ${i} has aggregated notes`);
312
+ }
313
+ });
314
+
315
+ // Check cross-modal consistency (if requested)
316
+ if (checkConsistency && renderedCode && result.experiences[0]?.screenshots?.length > 0) {
317
+ const screenshotPath = result.experiences[0].screenshots[result.experiences[0].screenshots.length - 1].path;
318
+ const consistency = checkCrossModalConsistency({
319
+ screenshot: screenshotPath,
320
+ renderedCode,
321
+ pageState: gameState
322
+ });
323
+ result.consistency = consistency;
324
+ trackPropagation('consistency', { isConsistent: consistency.isConsistent }, 'Checked cross-modal consistency');
325
+ }
326
+
327
+ // Get propagation history
328
+ const { getPropagationTracker } = await import('./experience-propagation.mjs');
329
+ result.propagation = getPropagationTracker().getHistory();
330
+
331
+ } catch (error) {
332
+ warn(`[Convenience] Gameplay test failed: ${error.message}`);
333
+ result.error = error.message;
334
+
335
+ // Ensure aggregated structures are always present even on error
336
+ if (!result.aggregated) {
337
+ result.aggregated = {
338
+ windows: [],
339
+ coherence: 0,
340
+ summary: 'Error during aggregation',
341
+ timeSpan: 0
342
+ };
343
+ }
344
+ if (!result.aggregatedMultiScale) {
345
+ result.aggregatedMultiScale = {
346
+ scales: {},
347
+ summary: 'Error during aggregation',
348
+ coherence: {}
349
+ };
350
+ }
351
+ }
352
+
353
+ // Final check - ensure aggregated structures are always present
354
+ if (!result.aggregated) {
355
+ result.aggregated = {
356
+ windows: [],
357
+ coherence: 0,
358
+ summary: 'No aggregation performed',
359
+ timeSpan: 0
360
+ };
361
+ }
362
+ if (!result.aggregatedMultiScale) {
363
+ result.aggregatedMultiScale = {
364
+ scales: {},
365
+ summary: 'No aggregation performed',
366
+ coherence: {}
367
+ };
368
+ }
369
+
370
+ return result;
371
+ }
372
+
373
+ /**
374
+ * Test browser experience with multiple stages
375
+ *
376
+ * Complete workflow for testing browser experiences across multiple stages
377
+ * (initial, form, payment, gameplay, etc.).
378
+ *
379
+ * @param {import('playwright').Page} page - Playwright page object
380
+ * @param {Object} options - Test options
381
+ * @param {string} options.url - Page URL
382
+ * @param {Array<Object>} [options.personas] - Personas to test with
383
+ * @param {Array<string>} [options.stages] - Stages to test ('initial', 'form', 'payment', 'gameplay')
384
+ * @param {boolean} [options.captureCode] - Extract rendered code
385
+ * @param {boolean} [options.captureTemporal] - Capture temporal screenshots
386
+ * @returns {Promise<Object>} Test results
387
+ */
388
+ export async function testBrowserExperience(page, options = {}) {
389
+ const {
390
+ url,
391
+ personas = null,
392
+ stages = ['initial', 'form', 'payment'],
393
+ captureCode = true,
394
+ captureTemporal = false
395
+ } = options;
396
+
397
+ if (!url) {
398
+ throw new ValidationError('testBrowserExperience: url is required', { function: 'testBrowserExperience', parameter: 'url' });
399
+ }
400
+
401
+ log('[Convenience] Testing browser experience:', { url, stages });
402
+
403
+ const result = {
404
+ url,
405
+ stages: [],
406
+ experiences: [],
407
+ evaluations: []
408
+ };
409
+
410
+ try {
411
+ // Navigate to page
412
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
413
+ await page.waitForLoadState('networkidle');
414
+
415
+ // Test each stage
416
+ for (const stage of stages) {
417
+ log(`[Convenience] Testing stage: ${stage}`);
418
+
419
+ // Extract rendered code for this stage
420
+ let renderedCode = null;
421
+ if (captureCode) {
422
+ renderedCode = await extractRenderedCode(page);
423
+ }
424
+
425
+ // Get page state
426
+ const pageState = await page.evaluate(() => {
427
+ return {
428
+ title: document.title,
429
+ url: window.location.href,
430
+ viewport: {
431
+ width: window.innerWidth,
432
+ height: window.innerHeight
433
+ }
434
+ };
435
+ });
436
+
437
+ // Experience with personas (if provided)
438
+ if (personas && personas.length > 0) {
439
+ const experiences = await experiencePageWithPersonas(page, personas, {
440
+ url,
441
+ captureScreenshots: true,
442
+ captureCode: captureCode,
443
+ captureState: true
444
+ });
445
+ result.experiences.push(...experiences);
446
+ } else {
447
+ // Single experience
448
+ const experience = await experiencePageAsPersona(page, {
449
+ name: 'Browser Tester',
450
+ perspective: `Testing ${stage} stage`,
451
+ focus: ['usability', 'accessibility']
452
+ }, {
453
+ url,
454
+ captureScreenshots: true,
455
+ captureCode: captureCode,
456
+ captureState: true
457
+ });
458
+ result.experiences.push(experience);
459
+ }
460
+
461
+ // Evaluate this stage
462
+ // Use aggregated notes from experience if available
463
+ const lastExperience = result.experiences[result.experiences.length - 1];
464
+ const screenshotPath = lastExperience?.screenshots?.[0]?.path;
465
+ if (screenshotPath) {
466
+ const prompt = `Evaluate the ${stage} stage. Check for usability, accessibility, and user experience.`;
467
+
468
+ // Include temporal notes from experience
469
+ const temporalNotes = lastExperience?.aggregated || null;
470
+
471
+ const evaluation = await validateScreenshot(screenshotPath, prompt, {
472
+ testType: `browser-experience-${stage}`,
473
+ renderedCode,
474
+ pageState,
475
+ temporalNotes: temporalNotes, // Include aggregated temporal notes
476
+ enableUncertaintyReduction: true // Enable uncertainty reduction for comprehensive testing
477
+ });
478
+ result.evaluations.push({
479
+ stage,
480
+ evaluation
481
+ });
482
+ }
483
+
484
+ result.stages.push(stage);
485
+
486
+ // Aggregate temporal notes across all stages
487
+ const allStageNotes = result.experiences.flatMap(exp => exp.notes || []);
488
+ if (allStageNotes.length > 0) {
489
+ const stageAggregated = aggregateTemporalNotes(allStageNotes, {
490
+ windowSize: 10000,
491
+ decayFactor: 0.9
492
+ });
493
+ result.aggregated = stageAggregated;
494
+
495
+ // Multi-scale aggregation across stages
496
+ const stageMultiScale = aggregateMultiScale(allStageNotes, {
497
+ attentionWeights: true
498
+ });
499
+ result.aggregatedMultiScale = stageMultiScale;
500
+ }
501
+
502
+ // Navigate to next stage (if needed)
503
+ if (stage === 'form' && stages.includes('payment')) {
504
+ // Fill form to get to payment
505
+ try {
506
+ await page.fill('#name', 'Test User');
507
+ await page.fill('#amount', '5');
508
+ await page.click('#continue-btn');
509
+ await page.waitForSelector('#payment:not(.hidden)', { timeout: 10000 });
510
+ } catch (e) {
511
+ warn(`[Convenience] Could not navigate to payment stage: ${e.message}`);
512
+ }
513
+ }
514
+ }
515
+
516
+ } catch (error) {
517
+ warn(`[Convenience] Browser experience test failed: ${error.message}`);
518
+ result.error = error.message;
519
+ }
520
+
521
+ return result;
522
+ }
523
+
524
+ /**
525
+ * Validate screenshot with variable goals
526
+ *
527
+ * Simplified API for validating screenshots with variable goals/prompts.
528
+ * Supports string goals, goal objects, arrays, and functions.
529
+ *
530
+ * Originally motivated by interactive web applications
531
+ * that requires real-time validation, variable goals, and temporal understanding.
532
+ *
533
+ * Supports:
534
+ * - Brutalist rubric goals
535
+ * - Accessibility goals with contrast requirements
536
+ * - Game state validation goals
537
+ * - Better error messages and context
538
+ *
539
+ * @param {string} screenshotPath - Path to screenshot
540
+ * @param {Object} options - Validation options
541
+ * @param {string | Object | Array | Function} options.goal - Variable goal (string, object, array, or function)
542
+ * @param {Object} [options.gameState] - Game state (if applicable)
543
+ * @param {Object} [options.renderedCode] - Rendered code (if available)
544
+ * @param {Object} [options.persona] - Persona (if applicable)
545
+ * @param {Object} [options.context] - Additional context
546
+ * @returns {Promise<Object>} Validation result
547
+ */
548
+ export async function validateWithGoals(screenshotPath, options = {}) {
549
+ const {
550
+ goal,
551
+ gameState = null,
552
+ renderedCode = null,
553
+ persona = null,
554
+ context = {}
555
+ } = options;
556
+
557
+ if (!screenshotPath) {
558
+ throw new ValidationError('validateWithGoals: screenshotPath is required', { function: 'validateWithGoals', parameter: 'screenshotPath' });
559
+ }
560
+
561
+ if (!goal) {
562
+ throw new ValidationError('validateWithGoals: goal is required', { function: 'validateWithGoals', parameter: 'goal' });
563
+ }
564
+
565
+ log('[Convenience] Validating with goal:', { screenshotPath, goal: typeof goal === 'string' ? goal : goal.description || 'object' });
566
+
567
+ // Generate prompt from goal (for display/debugging; goal also used by prompt composition)
568
+ const prompt = generateGamePrompt(goal, {
569
+ gameState,
570
+ renderedCode,
571
+ persona,
572
+ ...context
573
+ });
574
+
575
+ // Include temporal notes if available in context
576
+ let temporalNotes = null;
577
+ if (context.aggregated) {
578
+ temporalNotes = context.aggregated;
579
+ } else if (context.temporalNotes) {
580
+ temporalNotes = context.temporalNotes;
581
+ } else if (context.notes && context.notes.length > 0) {
582
+ // Auto-aggregate if notes provided but not aggregated
583
+ try {
584
+ temporalNotes = aggregateTemporalNotes(context.notes, {
585
+ windowSize: TEMPORAL_CONSTANTS.DEFAULT_WINDOW_SIZE_MS,
586
+ decayFactor: TEMPORAL_CONSTANTS.DEFAULT_DECAY_FACTOR
587
+ });
588
+ } catch (error) {
589
+ warn('[Convenience] Auto-aggregation failed, continuing without temporal notes:', error.message);
590
+ temporalNotes = null;
591
+ }
592
+ }
593
+
594
+ // Validate with goal in context (prompt composition system will use goal)
595
+ // Include temporal notes for richer context
596
+ // Merge context options (allow override of testType, enableUncertaintyReduction, etc.)
597
+ const validationContext = {
598
+ testType: 'goal-validation',
599
+ gameState,
600
+ renderedCode,
601
+ goal: goal, // Pass goal in context - prompt composition system will use it
602
+ temporalNotes: temporalNotes, // Include aggregated temporal notes
603
+ ...context // Allow context to override defaults (e.g., testType, enableUncertaintyReduction)
604
+ };
605
+
606
+ const result = await validateScreenshot(screenshotPath, prompt, validationContext);
607
+
608
+ // Normalize result structure (ensures consistent return type)
609
+ const normalizedResult = normalizeValidationResult(result, 'validateWithGoals');
610
+
611
+ return {
612
+ goal: typeof goal === 'string' ? goal : goal.description || 'unknown',
613
+ prompt,
614
+ result: normalizedResult
615
+ };
616
+ }
617
+