@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
@@ -0,0 +1,375 @@
1
+ /**
2
+ * Unified Prompt Composition System
3
+ *
4
+ * Research-backed prompt composition that systematically combines:
5
+ * - Rubrics (10-20% reliability improvement, arXiv:2412.05579)
6
+ * - Temporal context (coherence checking)
7
+ * - Persona perspectives (consistent formatting)
8
+ * - Comparison instructions (structured format for pair comparison)
9
+ * - Context information (testType, viewport, gameState)
10
+ *
11
+ * This replaces ad-hoc prompt building across modules.
12
+ */
13
+
14
+ import { buildRubricPrompt, DEFAULT_RUBRIC } from './rubrics.mjs';
15
+ import { formatNotesForPrompt, aggregateTemporalNotes } from './temporal.mjs';
16
+ import { formatTemporalContext } from './temporal-prompt-formatter.mjs';
17
+ import { selectTopWeightedNotes } from './temporal-note-pruner.mjs';
18
+ import { warn } from './logger.mjs';
19
+
20
+ // Lazy import for variable goals
21
+ let generateGamePrompt = null;
22
+ async function getGenerateGamePrompt() {
23
+ if (!generateGamePrompt) {
24
+ try {
25
+ const module = await import('./game-goal-prompts.mjs');
26
+ generateGamePrompt = module.generateGamePrompt;
27
+ } catch (error) {
28
+ return null;
29
+ }
30
+ }
31
+ return generateGamePrompt;
32
+ }
33
+
34
+ /**
35
+ * Compose a complete evaluation prompt with all relevant components
36
+ *
37
+ * @param {string} basePrompt - Base evaluation prompt
38
+ * @param {{
39
+ * rubric?: import('./index.mjs').Rubric;
40
+ * includeRubric?: boolean;
41
+ * temporalNotes?: import('./index.mjs').AggregatedTemporalNotes | null;
42
+ * persona?: import('./index.mjs').Persona | null;
43
+ * renderedCode?: import('./index.mjs').RenderedCode | null;
44
+ * gameState?: Record<string, unknown>;
45
+ * isMultiImage?: boolean;
46
+ * isComparison?: boolean;
47
+ * context?: import('./index.mjs').ValidationContext;
48
+ * }} [options={}] - Composition options
49
+ * @returns {string} Composed prompt
50
+ */
51
+ export async function composePrompt(basePrompt, options = {}) {
52
+ const {
53
+ rubric = DEFAULT_RUBRIC,
54
+ includeRubric = true, // Default true (research: 10-20% improvement)
55
+ temporalNotes = null,
56
+ persona = null,
57
+ renderedCode = null,
58
+ gameState = null,
59
+ isMultiImage = false,
60
+ isComparison = false,
61
+ context = {},
62
+ goal = null // Support variable goals for cohesive integration
63
+ } = options;
64
+
65
+ const parts = [];
66
+
67
+ // 1. Rubric (research: explicit rubrics improve reliability by 10-20%)
68
+ if (includeRubric) {
69
+ parts.push(buildRubricPrompt(rubric, true));
70
+ }
71
+
72
+ // 2. Base prompt (or generate from goal if provided)
73
+ let finalBasePrompt = basePrompt;
74
+ if (goal) {
75
+ try {
76
+ const generateGamePromptFn = await getGenerateGamePrompt();
77
+ if (generateGamePromptFn) {
78
+ finalBasePrompt = generateGamePromptFn(goal, {
79
+ gameState: gameState || context.gameState || {},
80
+ previousState: context.previousState || null,
81
+ renderedCode: renderedCode || context.renderedCode || null,
82
+ persona: persona || (context.persona ? {
83
+ name: context.persona,
84
+ perspective: context.perspective,
85
+ focus: context.focus || []
86
+ } : null),
87
+ stage: context.stage || context.testType || 'gameplay'
88
+ });
89
+ }
90
+ } catch (error) {
91
+ // Fallback to base prompt if goal generation fails
92
+ if (context.debug?.verbose) {
93
+ warn(`[Prompt Composer] Goal prompt generation failed: ${error.message}`);
94
+ }
95
+ }
96
+ }
97
+ parts.push(finalBasePrompt);
98
+
99
+ // 3. Temporal context (if available)
100
+ if (temporalNotes) {
101
+ // Check if temporalNotes is raw array or aggregated object
102
+ // formatTemporalContext handles both single-scale and multi-scale aggregation
103
+ let processedTemporalNotes = temporalNotes;
104
+ if (Array.isArray(temporalNotes)) {
105
+ // Raw notes array - prune and select top-weighted notes before aggregating
106
+ // This implements note propagation: only keep relevant notes
107
+ try {
108
+ // Prune to top-weighted notes (implements note propagation)
109
+ const prunedNotes = selectTopWeightedNotes(temporalNotes, {
110
+ topN: context.maxTemporalNotes || 10 // Default: top 10 notes
111
+ });
112
+
113
+ // Aggregate pruned notes
114
+ processedTemporalNotes = aggregateTemporalNotes(prunedNotes);
115
+ } catch (error) {
116
+ // If pruning/aggregation fails, skip temporal context
117
+ if (context.debug?.verbose) {
118
+ warn(`[Prompt Composer] Failed to prune/aggregate temporal notes: ${error.message}`);
119
+ }
120
+ processedTemporalNotes = null;
121
+ }
122
+ }
123
+
124
+ // Format temporal context (handles both single-scale and multi-scale)
125
+ if (processedTemporalNotes) {
126
+ const temporalContext = formatTemporalContext(processedTemporalNotes, {
127
+ includeMultiScale: true,
128
+ naturalLanguage: true // Use natural language for better VLM understanding
129
+ });
130
+ if (temporalContext) {
131
+ parts.push('\n\n' + temporalContext);
132
+ }
133
+ }
134
+ }
135
+
136
+ // 4. Persona perspective (if provided)
137
+ if (persona) {
138
+ parts.push('\n\n' + buildPersonaContext(persona, renderedCode, gameState));
139
+ }
140
+
141
+ // 5. Multi-modal context (rendered code, game state)
142
+ if (renderedCode || gameState) {
143
+ parts.push('\n\n' + buildMultiModalContext(renderedCode, gameState));
144
+ }
145
+
146
+ // 6. Comparison instructions (if multi-image or comparison)
147
+ if (isMultiImage || isComparison) {
148
+ parts.push('\n\n' + buildComparisonInstructions(isComparison));
149
+ }
150
+
151
+ // 7. Context information (testType, viewport, etc.)
152
+ const contextPart = buildContextSection(context);
153
+ if (contextPart) {
154
+ parts.push('\n\n' + contextPart);
155
+ }
156
+
157
+ return parts.join('');
158
+ }
159
+
160
+ /**
161
+ * Build persona context section
162
+ *
163
+ * @param {import('./index.mjs').Persona} persona - Persona configuration
164
+ * @param {import('./index.mjs').RenderedCode | null} renderedCode - Rendered code (optional)
165
+ * @param {Record<string, unknown> | null} gameState - Game state (optional)
166
+ * @returns {string} Persona context section
167
+ */
168
+ function buildPersonaContext(persona, renderedCode = null, gameState = null) {
169
+ const parts = [];
170
+
171
+ parts.push(`PERSONA PERSPECTIVE: ${persona.name}`);
172
+
173
+ if (persona.perspective) {
174
+ parts.push(persona.perspective);
175
+ }
176
+
177
+ if (persona.focus && Array.isArray(persona.focus)) {
178
+ parts.push(`FOCUS AREAS: ${persona.focus.join(', ')}`);
179
+ } else if (persona.goals && Array.isArray(persona.goals)) {
180
+ parts.push(`GOALS: ${persona.goals.join(', ')}`);
181
+ }
182
+
183
+ if (persona.concerns && Array.isArray(persona.concerns)) {
184
+ parts.push(`CONCERNS: ${persona.concerns.join(', ')}`);
185
+ }
186
+
187
+ parts.push('\nEvaluate from this persona\'s perspective.');
188
+
189
+ return parts.join('\n');
190
+ }
191
+
192
+ /**
193
+ * Build multi-modal context section
194
+ *
195
+ * @param {import('./index.mjs').RenderedCode | null} renderedCode - Rendered code
196
+ * @param {Record<string, unknown> | null} gameState - Game state
197
+ * @returns {string} Multi-modal context section
198
+ */
199
+ function buildMultiModalContext(renderedCode = null, gameState = null) {
200
+ const parts = [];
201
+
202
+ if (renderedCode) {
203
+ parts.push('RENDERED CODE ANALYSIS:');
204
+ if (renderedCode.domStructure) {
205
+ parts.push(`DOM Structure: ${JSON.stringify(renderedCode.domStructure, null, 2)}`);
206
+ }
207
+ if (renderedCode.criticalCSS) {
208
+ parts.push(`Critical CSS: ${JSON.stringify(renderedCode.criticalCSS, null, 2)}`);
209
+ }
210
+ if (renderedCode.html) {
211
+ parts.push(`HTML (first 5000 chars): ${renderedCode.html.substring(0, 5000)}`);
212
+ }
213
+ }
214
+
215
+ if (gameState && Object.keys(gameState).length > 0) {
216
+ parts.push(`GAME STATE: ${JSON.stringify(gameState, null, 2)}`);
217
+ }
218
+
219
+ if (parts.length > 0) {
220
+ parts.unshift('MULTI-MODAL CONTEXT:');
221
+ parts.push('\nConsider:');
222
+ parts.push('1. Visual appearance (from screenshot)');
223
+ parts.push('2. Code correctness (from rendered code)');
224
+ parts.push('3. State consistency (does visual match code and state?)');
225
+ parts.push('4. Principles alignment (does it match design principles?)');
226
+ }
227
+
228
+ return parts.join('\n');
229
+ }
230
+
231
+ /**
232
+ * Build comparison instructions
233
+ *
234
+ * @param {boolean} isComparison - Whether this is a pair comparison
235
+ * @returns {string} Comparison instructions
236
+ */
237
+ function buildComparisonInstructions(isComparison) {
238
+ if (!isComparison) {
239
+ return '';
240
+ }
241
+
242
+ return `COMPARISON INSTRUCTIONS:
243
+ You are comparing two screenshots side-by-side. Return JSON with:
244
+ {
245
+ "winner": "A" | "B" | "tie",
246
+ "confidence": 0.0-1.0,
247
+ "reasoning": "explanation",
248
+ "differences": ["difference1", "difference2"],
249
+ "scores": {"A": 0-10, "B": 0-10}
250
+ }
251
+
252
+ Focus on:
253
+ - Which screenshot better meets the criteria?
254
+ - What are the key differences?
255
+ - Which has fewer issues?
256
+ - Which provides better user experience?
257
+
258
+ Be specific about what makes one better than the other.`;
259
+ }
260
+
261
+ /**
262
+ * Build context section
263
+ *
264
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
265
+ * @returns {string} Context section
266
+ */
267
+ function buildContextSection(context) {
268
+ const parts = [];
269
+
270
+ if (context.testType) {
271
+ parts.push(`Test Type: ${context.testType}`);
272
+ }
273
+
274
+ if (context.viewport) {
275
+ parts.push(`Viewport: ${context.viewport.width}x${context.viewport.height}`);
276
+ }
277
+
278
+ if (context.gameState && !context.gameStateAlreadyIncluded) {
279
+ parts.push(`Game State: ${JSON.stringify(context.gameState)}`);
280
+ }
281
+
282
+ if (parts.length === 0) {
283
+ return '';
284
+ }
285
+
286
+ return 'CONTEXT:\n' + parts.join('\n');
287
+ }
288
+
289
+ /**
290
+ * Compose prompt for single image evaluation
291
+ *
292
+ * @param {string} basePrompt - Base prompt
293
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
294
+ * @param {{
295
+ * includeRubric?: boolean;
296
+ * temporalNotes?: import('./index.mjs').AggregatedTemporalNotes | null;
297
+ * }} [options={}] - Additional options
298
+ * @returns {string} Composed prompt
299
+ */
300
+ export async function composeSingleImagePrompt(basePrompt, context = {}, options = {}) {
301
+ return await composePrompt(basePrompt, {
302
+ includeRubric: options.includeRubric !== false,
303
+ temporalNotes: options.temporalNotes || null,
304
+ persona: context.persona ? {
305
+ name: context.persona,
306
+ perspective: context.perspective,
307
+ focus: context.focus || []
308
+ } : null,
309
+ renderedCode: context.renderedCode || null,
310
+ gameState: context.gameState || null,
311
+ isMultiImage: false,
312
+ isComparison: false,
313
+ goal: context.goal || null, // Support variable goals
314
+ context
315
+ });
316
+ }
317
+
318
+ /**
319
+ * Compose prompt for pair comparison
320
+ *
321
+ * @param {string} basePrompt - Base comparison prompt
322
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
323
+ * @param {{
324
+ * includeRubric?: boolean;
325
+ * }} [options={}] - Additional options
326
+ * @returns {string} Composed comparison prompt
327
+ */
328
+ export async function composeComparisonPrompt(basePrompt, context = {}, options = {}) {
329
+ return await composePrompt(basePrompt, {
330
+ includeRubric: options.includeRubric !== false,
331
+ temporalNotes: null, // Pair comparison doesn't use temporal notes
332
+ persona: null, // Pair comparison is objective
333
+ renderedCode: null, // Pair comparison is visual-only
334
+ gameState: null,
335
+ isMultiImage: true,
336
+ isComparison: true,
337
+ goal: context.goal || null, // Support variable goals (though less common for comparisons)
338
+ context
339
+ });
340
+ }
341
+
342
+ /**
343
+ * Compose prompt for multi-modal validation
344
+ *
345
+ * @param {string} basePrompt - Base prompt
346
+ * @param {import('./index.mjs').ValidationContext} context - Validation context
347
+ * @param {{
348
+ * includeRubric?: boolean;
349
+ * temporalNotes?: import('./index.mjs').AggregatedTemporalNotes | null;
350
+ * persona?: import('./index.mjs').Persona | null;
351
+ * }} [options={}] - Additional options
352
+ * @returns {string} Composed multi-modal prompt
353
+ */
354
+ export async function composeMultiModalPrompt(basePrompt, context = {}, options = {}) {
355
+ return await composePrompt(basePrompt, {
356
+ includeRubric: options.includeRubric !== false,
357
+ temporalNotes: options.temporalNotes || null,
358
+ persona: options.persona || (context.persona ? {
359
+ name: context.persona,
360
+ perspective: context.perspective,
361
+ focus: context.focus || []
362
+ } : null),
363
+ renderedCode: context.renderedCode || null,
364
+ gameState: context.gameState || null,
365
+ isMultiImage: false,
366
+ isComparison: false,
367
+ goal: context.goal || options.goal || null, // Support variable goals
368
+ context: {
369
+ ...context,
370
+ gameStateAlreadyIncluded: true // Prevent duplicate gameState
371
+ }
372
+ });
373
+ }
374
+
375
+