@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
@@ -0,0 +1,478 @@
1
+ /**
2
+ * Variable Goal/Prompt System for Games
3
+ *
4
+ * Allows games to accept variable prompts and goals, not just hardcoded game states.
5
+ *
6
+ * Research:
7
+ * - "Goal-Conditioned Reinforcement Learning: Problems and Solutions" (arXiv:2201.08299)
8
+ * - Comprehensive survey of goal-conditioned RL
9
+ * - "Accelerating Goal-Conditioned RL Algorithms and Research" (arXiv:2408.11052)
10
+ * - Fast RL benchmarks, flexible tasks, multiple goals
11
+ * - "OGBench: Benchmarking Offline Goal-Conditioned RL" (arXiv:2410.20092)
12
+ * - Benchmark with dynamic goal-based evaluation
13
+ * - "Test-Time Graph Search for Goal-Conditioned Control" (arXiv:2510.07257)
14
+ * - Adaptive, context-aware goal fulfillment
15
+ *
16
+ * This implementation provides flexible goal specification for game evaluation, supporting
17
+ * multiple goal formats (string, object, array, function) and context-aware generation.
18
+ */
19
+
20
+ import { log, warn } from './logger.mjs';
21
+
22
+ /**
23
+ * Generate game prompt from variable goal/prompt
24
+ *
25
+ * Supports multiple formats:
26
+ * 1. String prompt - Direct prompt text
27
+ * 2. Goal object - Structured goal with criteria
28
+ * 3. Goal array - Multiple goals to evaluate
29
+ * 4. Function - Dynamic goal generation
30
+ *
31
+ * @param {string | Object | Array | Function} goalOrPrompt - Goal or prompt specification
32
+ * @param {Object} context - Game context
33
+ * @param {Object} context.gameState - Current game state
34
+ * @param {Object} [context.previousState] - Previous game state
35
+ * @param {Object} [context.renderedCode] - Rendered code (HTML/CSS)
36
+ * @param {Object} [context.persona] - Persona configuration
37
+ * @param {string} [context.stage] - Current stage
38
+ * @returns {string} Generated prompt
39
+ */
40
+ export function generateGamePrompt(goalOrPrompt, context = {}) {
41
+ const {
42
+ gameState = {},
43
+ previousState = null,
44
+ renderedCode = null,
45
+ persona = null,
46
+ stage = 'gameplay'
47
+ } = context;
48
+
49
+ // Handle different input types
50
+ if (typeof goalOrPrompt === 'string') {
51
+ // Direct prompt string
52
+ return buildPromptFromString(goalOrPrompt, context);
53
+ } else if (typeof goalOrPrompt === 'function') {
54
+ // Function that generates prompt
55
+ return goalOrPrompt(context);
56
+ } else if (Array.isArray(goalOrPrompt)) {
57
+ // Array of goals
58
+ return buildPromptFromGoals(goalOrPrompt, context);
59
+ } else if (goalOrPrompt && typeof goalOrPrompt === 'object') {
60
+ // Goal object
61
+ return buildPromptFromGoal(goalOrPrompt, context);
62
+ } else {
63
+ // Fallback to default gameplay prompt
64
+ warn('[Game Goal] Invalid goal/prompt format, using default');
65
+ return buildDefaultGameplayPrompt(context);
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Build prompt from string (with context interpolation)
71
+ */
72
+ function buildPromptFromString(promptString, context) {
73
+ const { gameState = {}, previousState = null, persona = null } = context;
74
+
75
+ // Interpolate context variables
76
+ let prompt = promptString;
77
+
78
+ // Replace ${gameState.*} patterns
79
+ prompt = prompt.replace(/\$\{gameState\.(\w+)\}/g, (match, key) => {
80
+ return gameState[key] !== undefined ? String(gameState[key]) : match;
81
+ });
82
+
83
+ // Replace ${previousState.*} patterns
84
+ if (previousState) {
85
+ prompt = prompt.replace(/\$\{previousState\.(\w+)\}/g, (match, key) => {
86
+ return previousState[key] !== undefined ? String(previousState[key]) : match;
87
+ });
88
+ }
89
+
90
+ // Add context if not already present
91
+ if (!prompt.includes('CURRENT GAME STATE') && gameState && Object.keys(gameState).length > 0) {
92
+ prompt += `\n\nCURRENT GAME STATE:\n${formatGameState(gameState)}`;
93
+ }
94
+
95
+ // Add persona context if provided
96
+ if (persona && !prompt.includes('PERSONA')) {
97
+ prompt += `\n\nPERSONA PERSPECTIVE: ${persona.name}\n${persona.perspective || ''}`;
98
+ }
99
+
100
+ return prompt;
101
+ }
102
+
103
+ /**
104
+ * Build prompt from goal object
105
+ */
106
+ function buildPromptFromGoal(goal, context) {
107
+ const {
108
+ description,
109
+ criteria = [],
110
+ focus = [],
111
+ questions = [],
112
+ minScore = null,
113
+ maxScore = null
114
+ } = goal;
115
+
116
+ const { gameState = {}, previousState = null, persona = null } = context;
117
+
118
+ const parts = [];
119
+
120
+ // Goal description
121
+ if (description) {
122
+ parts.push(`GOAL: ${description}`);
123
+ }
124
+
125
+ // Evaluation criteria
126
+ if (criteria.length > 0) {
127
+ parts.push(`\nEVALUATION CRITERIA:\n${criteria.map((c, i) => `${i + 1}. ${c}`).join('\n')}`);
128
+ }
129
+
130
+ // Focus areas
131
+ if (focus.length > 0) {
132
+ parts.push(`\nFOCUS AREAS: ${focus.join(', ')}`);
133
+ }
134
+
135
+ // Specific questions
136
+ if (questions.length > 0) {
137
+ parts.push(`\nQUESTIONS TO ANSWER:\n${questions.map((q, i) => `${i + 1}. ${q}`).join('\n')}`);
138
+ }
139
+
140
+ // Score expectations
141
+ if (minScore !== null || maxScore !== null) {
142
+ const range = minScore !== null && maxScore !== null
143
+ ? `${minScore}-${maxScore}`
144
+ : minScore !== null
145
+ ? `≥${minScore}`
146
+ : `≤${maxScore}`;
147
+ parts.push(`\nEXPECTED SCORE RANGE: ${range}/10`);
148
+ }
149
+
150
+ // Game state context
151
+ if (gameState && Object.keys(gameState).length > 0) {
152
+ parts.push(`\n\nCURRENT GAME STATE:\n${formatGameState(gameState)}`);
153
+ }
154
+
155
+ // Previous state comparison
156
+ if (previousState) {
157
+ parts.push(`\n\nPREVIOUS STATE:\n${formatGameState(previousState)}`);
158
+ parts.push(`\nCHANGES: ${formatStateChanges(previousState, gameState)}`);
159
+ }
160
+
161
+ // Persona context
162
+ if (persona) {
163
+ parts.push(`\n\nPERSONA PERSPECTIVE: ${persona.name}`);
164
+ if (persona.perspective) parts.push(persona.perspective);
165
+ if (persona.goals) parts.push(`GOALS: ${persona.goals.join(', ')}`);
166
+ }
167
+
168
+ return parts.join('\n');
169
+ }
170
+
171
+ /**
172
+ * Build prompt from array of goals
173
+ */
174
+ function buildPromptFromGoals(goals, context) {
175
+ if (goals.length === 0) {
176
+ return buildDefaultGameplayPrompt(context);
177
+ }
178
+
179
+ if (goals.length === 1) {
180
+ return generateGamePrompt(goals[0], context);
181
+ }
182
+
183
+ // Multiple goals - evaluate all
184
+ const parts = ['EVALUATE THE FOLLOWING GOALS:'];
185
+
186
+ goals.forEach((goal, i) => {
187
+ if (typeof goal === 'string') {
188
+ parts.push(`\n${i + 1}. ${goal}`);
189
+ } else if (goal && typeof goal === 'object' && goal.description) {
190
+ parts.push(`\n${i + 1}. ${goal.description}`);
191
+ if (goal.criteria && goal.criteria.length > 0) {
192
+ parts.push(` Criteria: ${goal.criteria.join(', ')}`);
193
+ }
194
+ }
195
+ });
196
+
197
+ // Add context
198
+ const { gameState = {}, previousState = null, persona = null } = context;
199
+
200
+ if (gameState && Object.keys(gameState).length > 0) {
201
+ parts.push(`\n\nCURRENT GAME STATE:\n${formatGameState(gameState)}`);
202
+ }
203
+
204
+ if (previousState) {
205
+ parts.push(`\n\nPREVIOUS STATE:\n${formatGameState(previousState)}`);
206
+ }
207
+
208
+ if (persona) {
209
+ parts.push(`\n\nPERSONA: ${persona.name}`);
210
+ }
211
+
212
+ parts.push(`\n\nEvaluate how well the game state meets each goal. Provide a score (0-10) and specific feedback for each goal.`);
213
+
214
+ return parts.join('\n');
215
+ }
216
+
217
+ /**
218
+ * Build default gameplay prompt (fallback)
219
+ */
220
+ function buildDefaultGameplayPrompt(context) {
221
+ const { gameState = {}, previousState = null, persona = null } = context;
222
+
223
+ const parts = [
224
+ 'Evaluate the gameplay experience.',
225
+ 'Consider:',
226
+ '1. Is the game fun and engaging?',
227
+ '2. Are controls responsive?',
228
+ '3. Is there clear feedback for actions?',
229
+ '4. Is the difficulty appropriate?',
230
+ '5. Would you want to play again?'
231
+ ];
232
+
233
+ if (gameState && Object.keys(gameState).length > 0) {
234
+ parts.push(`\n\nCURRENT GAME STATE:\n${formatGameState(gameState)}`);
235
+ }
236
+
237
+ if (previousState) {
238
+ parts.push(`\n\nPREVIOUS STATE:\n${formatGameState(previousState)}`);
239
+ parts.push(`\nCHANGES: ${formatStateChanges(previousState, gameState)}`);
240
+ }
241
+
242
+ if (persona) {
243
+ parts.push(`\n\nPERSONA: ${persona.name}`);
244
+ if (persona.perspective) parts.push(persona.perspective);
245
+ }
246
+
247
+ return parts.join('\n');
248
+ }
249
+
250
+ /**
251
+ * Format game state for prompt
252
+ */
253
+ function formatGameState(gameState) {
254
+ if (!gameState || Object.keys(gameState).length === 0) {
255
+ return 'No game state available';
256
+ }
257
+
258
+ const lines = [];
259
+
260
+ // Common game state fields
261
+ if (gameState.gameActive !== undefined) {
262
+ lines.push(`- Active: ${gameState.gameActive}`);
263
+ }
264
+ if (gameState.score !== undefined) {
265
+ lines.push(`- Score: ${gameState.score}`);
266
+ }
267
+ if (gameState.level !== undefined) {
268
+ lines.push(`- Level: ${gameState.level}`);
269
+ }
270
+ if (gameState.lives !== undefined) {
271
+ lines.push(`- Lives: ${gameState.lives}`);
272
+ }
273
+ if (gameState.bricks !== undefined) {
274
+ lines.push(`- Bricks: ${Array.isArray(gameState.bricks) ? gameState.bricks.length : gameState.bricks} remaining`);
275
+ }
276
+ if (gameState.ball !== undefined) {
277
+ lines.push(`- Ball: ${gameState.ball ? 'visible' : 'not visible'}`);
278
+ }
279
+ if (gameState.paddle !== undefined) {
280
+ lines.push(`- Paddle: ${gameState.paddle ? 'visible' : 'not visible'}`);
281
+ }
282
+
283
+ // Any other fields
284
+ const otherFields = Object.keys(gameState).filter(key =>
285
+ !['gameActive', 'score', 'level', 'lives', 'bricks', 'ball', 'paddle'].includes(key)
286
+ );
287
+
288
+ if (otherFields.length > 0) {
289
+ otherFields.forEach(key => {
290
+ const value = gameState[key];
291
+ if (value !== undefined && value !== null) {
292
+ lines.push(`- ${key}: ${typeof value === 'object' ? JSON.stringify(value) : value}`);
293
+ }
294
+ });
295
+ }
296
+
297
+ return lines.join('\n');
298
+ }
299
+
300
+ /**
301
+ * Format state changes between previous and current
302
+ */
303
+ function formatStateChanges(previousState, currentState) {
304
+ if (!previousState || !currentState) return 'No previous state';
305
+
306
+ const changes = [];
307
+
308
+ // Score change
309
+ if (previousState.score !== undefined && currentState.score !== undefined) {
310
+ const scoreChange = currentState.score - previousState.score;
311
+ if (scoreChange !== 0) {
312
+ changes.push(`Score: ${previousState.score} → ${currentState.score} (${scoreChange > 0 ? '+' : ''}${scoreChange})`);
313
+ }
314
+ }
315
+
316
+ // Level change
317
+ if (previousState.level !== undefined && currentState.level !== undefined) {
318
+ if (previousState.level !== currentState.level) {
319
+ changes.push(`Level: ${previousState.level} → ${currentState.level}`);
320
+ }
321
+ }
322
+
323
+ // Lives change
324
+ if (previousState.lives !== undefined && currentState.lives !== undefined) {
325
+ if (previousState.lives !== currentState.lives) {
326
+ changes.push(`Lives: ${previousState.lives} → ${currentState.lives}`);
327
+ }
328
+ }
329
+
330
+ // Bricks change
331
+ if (previousState.bricks !== undefined && currentState.bricks !== undefined) {
332
+ const prevCount = Array.isArray(previousState.bricks) ? previousState.bricks.length : previousState.bricks;
333
+ const currCount = Array.isArray(currentState.bricks) ? currentState.bricks.length : currentState.bricks;
334
+ if (prevCount !== currCount) {
335
+ const destroyed = prevCount - currCount;
336
+ changes.push(`Bricks: ${prevCount} → ${currCount} (${destroyed > 0 ? `${destroyed} destroyed` : 'added'})`);
337
+ }
338
+ }
339
+
340
+ // Game active change
341
+ if (previousState.gameActive !== undefined && currentState.gameActive !== undefined) {
342
+ if (previousState.gameActive !== currentState.gameActive) {
343
+ changes.push(`Game: ${previousState.gameActive ? 'active' : 'paused'} → ${currentState.gameActive ? 'active' : 'paused'}`);
344
+ }
345
+ }
346
+
347
+ return changes.length > 0 ? changes.join(', ') : 'No significant changes';
348
+ }
349
+
350
+ /**
351
+ * Create goal from common game evaluation patterns
352
+ *
353
+ * @param {string} goalType - Type of goal ('fun', 'accessibility', 'performance', 'balance', 'visuals', 'controls')
354
+ * @param {Object} [options={}] - Goal options
355
+ * @returns {Object} Goal object
356
+ */
357
+ export function createGameGoal(goalType, options = {}) {
358
+ const goalTemplates = {
359
+ fun: {
360
+ description: 'Evaluate if the game is fun and engaging',
361
+ criteria: [
362
+ 'Is the game enjoyable to play?',
363
+ 'Does it provide a sense of achievement?',
364
+ 'Is there variety in gameplay?',
365
+ 'Would players want to replay?'
366
+ ],
367
+ focus: ['engagement', 'enjoyment', 'replayability'],
368
+ questions: [
369
+ 'What makes this game fun?',
370
+ 'What would make it more engaging?',
371
+ 'Is there enough variety?'
372
+ ]
373
+ },
374
+ accessibility: {
375
+ description: 'Evaluate game accessibility',
376
+ criteria: [
377
+ 'Can the game be played with keyboard only?',
378
+ 'Are visual indicators clear?',
379
+ 'Is there audio feedback?',
380
+ 'Are controls customizable?'
381
+ ],
382
+ focus: ['keyboard-navigation', 'visual-indicators', 'audio-feedback', 'customization'],
383
+ questions: [
384
+ 'Can someone with motor disabilities play this?',
385
+ 'Can someone with visual impairments play this?',
386
+ 'Are controls accessible?'
387
+ ]
388
+ },
389
+ performance: {
390
+ description: 'Evaluate game performance',
391
+ criteria: [
392
+ 'Is the frame rate smooth (60 FPS)?',
393
+ 'Are there lag spikes?',
394
+ 'Is the game responsive?',
395
+ 'Are there performance issues?'
396
+ ],
397
+ focus: ['frame-rate', 'responsiveness', 'lag', 'optimization'],
398
+ questions: [
399
+ 'Is the game running smoothly?',
400
+ 'Are there any performance bottlenecks?',
401
+ 'Is input lag acceptable?'
402
+ ]
403
+ },
404
+ balance: {
405
+ description: 'Evaluate game balance',
406
+ criteria: [
407
+ 'Is difficulty appropriate?',
408
+ 'Is progression clear?',
409
+ 'Is the game too easy or too hard?',
410
+ 'Is there a good learning curve?'
411
+ ],
412
+ focus: ['difficulty', 'progression', 'learning-curve', 'challenge'],
413
+ questions: [
414
+ 'Is the game balanced?',
415
+ 'Is difficulty appropriate for target audience?',
416
+ 'Is progression satisfying?'
417
+ ]
418
+ },
419
+ visuals: {
420
+ description: 'Evaluate visual design',
421
+ criteria: [
422
+ 'Are colors vibrant and clear?',
423
+ 'Is the layout clear?',
424
+ 'Are animations smooth?',
425
+ 'Is visual feedback clear?'
426
+ ],
427
+ focus: ['colors', 'layout', 'animations', 'visual-feedback'],
428
+ questions: [
429
+ 'Is the visual design appealing?',
430
+ 'Are visual elements clear?',
431
+ 'Do animations enhance the experience?'
432
+ ]
433
+ },
434
+ controls: {
435
+ description: 'Evaluate game controls',
436
+ criteria: [
437
+ 'Are controls responsive?',
438
+ 'Are controls intuitive?',
439
+ 'Is there clear feedback for actions?',
440
+ 'Are controls customizable?'
441
+ ],
442
+ focus: ['responsiveness', 'intuitiveness', 'feedback', 'customization'],
443
+ questions: [
444
+ 'Are controls easy to learn?',
445
+ 'Is input lag acceptable?',
446
+ 'Do controls feel good?'
447
+ ]
448
+ }
449
+ };
450
+
451
+ const template = goalTemplates[goalType];
452
+ if (!template) {
453
+ warn(`[Game Goal] Unknown goal type: ${goalType}, using 'fun' as default`);
454
+ return createGameGoal('fun', options);
455
+ }
456
+
457
+ // Merge with custom options
458
+ return {
459
+ ...template,
460
+ ...options,
461
+ // Allow overriding specific fields
462
+ criteria: options.criteria || template.criteria,
463
+ focus: options.focus || template.focus,
464
+ questions: options.questions || template.questions
465
+ };
466
+ }
467
+
468
+ /**
469
+ * Create multiple goals for comprehensive evaluation
470
+ *
471
+ * @param {string[]} goalTypes - Array of goal types
472
+ * @param {Object} [options={}] - Options for all goals
473
+ * @returns {Object[]} Array of goal objects
474
+ */
475
+ export function createGameGoals(goalTypes, options = {}) {
476
+ return goalTypes.map(type => createGameGoal(type, options));
477
+ }
478
+