@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dynamic Few-Shot Example Selection
|
|
3
|
+
*
|
|
4
|
+
* Research: ES-KNN (semantically similar in-context examples) outperforms
|
|
5
|
+
* random examples. Examples should be semantically similar to evaluation task.
|
|
6
|
+
*
|
|
7
|
+
* Papers:
|
|
8
|
+
* - ES-KNN: arXiv:2506.05614 (Exemplar Selection KNN using semantic similarity)
|
|
9
|
+
* - KATE: arXiv:2101.06804 (Foundational work on kNN-augmented in-context examples)
|
|
10
|
+
*
|
|
11
|
+
* Note: This implementation uses keyword-based similarity (Jaccard) rather than
|
|
12
|
+
* true semantic embeddings due to npm package constraints. For full ES-KNN,
|
|
13
|
+
* embedding-based cosine similarity would be required.
|
|
14
|
+
*
|
|
15
|
+
* This module provides dynamic few-shot example selection based on similarity
|
|
16
|
+
* to the evaluation prompt.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Select few-shot examples based on semantic similarity to prompt
|
|
21
|
+
*
|
|
22
|
+
* @param {string} prompt - Evaluation prompt
|
|
23
|
+
* @param {Array<import('./index.mjs').FewShotExample>} examples - Available examples
|
|
24
|
+
* @param {{
|
|
25
|
+
* maxExamples?: number;
|
|
26
|
+
* similarityThreshold?: number;
|
|
27
|
+
* useSemanticMatching?: boolean;
|
|
28
|
+
* }} [options={}] - Selection options
|
|
29
|
+
* @returns {Array<import('./index.mjs').FewShotExample>} Selected examples
|
|
30
|
+
*/
|
|
31
|
+
export function selectFewShotExamples(prompt, examples = [], options = {}) {
|
|
32
|
+
const {
|
|
33
|
+
maxExamples = 3,
|
|
34
|
+
similarityThreshold = 0.3,
|
|
35
|
+
useSemanticMatching = true
|
|
36
|
+
} = options;
|
|
37
|
+
|
|
38
|
+
// Validate inputs
|
|
39
|
+
if (!examples || !Array.isArray(examples) || examples.length === 0) {
|
|
40
|
+
return [];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Validate prompt
|
|
44
|
+
if (typeof prompt !== 'string') {
|
|
45
|
+
return examples.slice(0, maxExamples); // Fallback to simple selection
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (!useSemanticMatching || examples.length <= maxExamples) {
|
|
49
|
+
// Return all examples if we have few enough, or just first N
|
|
50
|
+
return examples.slice(0, maxExamples);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Simple keyword-based similarity (for npm package - full semantic matching would require embeddings)
|
|
54
|
+
const promptKeywords = extractKeywords(prompt.toLowerCase());
|
|
55
|
+
|
|
56
|
+
// Score each example by keyword overlap
|
|
57
|
+
const scored = examples.map(example => {
|
|
58
|
+
const exampleKeywords = extractKeywords(
|
|
59
|
+
(example.description || '') + ' ' + (example.evaluation || '')
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
// Jaccard similarity (intersection over union)
|
|
63
|
+
const intersection = new Set(
|
|
64
|
+
[...promptKeywords].filter(k => exampleKeywords.has(k))
|
|
65
|
+
);
|
|
66
|
+
const union = new Set([...promptKeywords, ...exampleKeywords]);
|
|
67
|
+
const similarity = union.size > 0 ? intersection.size / union.size : 0;
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
example,
|
|
71
|
+
similarity
|
|
72
|
+
};
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Sort by similarity and take top N
|
|
76
|
+
return scored
|
|
77
|
+
.filter(s => s.similarity >= similarityThreshold)
|
|
78
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
79
|
+
.slice(0, maxExamples)
|
|
80
|
+
.map(s => s.example);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Extract keywords from text (simple implementation)
|
|
85
|
+
*
|
|
86
|
+
* @param {string} text - Text to extract keywords from
|
|
87
|
+
* @returns {Set<string>} Set of keywords
|
|
88
|
+
*/
|
|
89
|
+
function extractKeywords(text) {
|
|
90
|
+
// Remove common stop words and extract meaningful terms
|
|
91
|
+
const stopWords = new Set([
|
|
92
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
|
|
93
|
+
'of', 'with', 'by', 'from', 'is', 'are', 'was', 'were', 'be', 'been',
|
|
94
|
+
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'should',
|
|
95
|
+
'could', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those'
|
|
96
|
+
]);
|
|
97
|
+
|
|
98
|
+
// Extract words (alphanumeric, at least 3 chars)
|
|
99
|
+
const words = text.match(/\b[a-z]{3,}\b/gi) || [];
|
|
100
|
+
|
|
101
|
+
return new Set(
|
|
102
|
+
words
|
|
103
|
+
.map(w => w.toLowerCase())
|
|
104
|
+
.filter(w => !stopWords.has(w))
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Format few-shot examples for prompt
|
|
110
|
+
*
|
|
111
|
+
* @param {Array<import('./index.mjs').FewShotExample>} examples - Examples to format
|
|
112
|
+
* @param {string} [format='default'] - Format style
|
|
113
|
+
* @returns {string} Formatted examples text
|
|
114
|
+
*/
|
|
115
|
+
export function formatFewShotExamples(examples, format = 'default') {
|
|
116
|
+
if (examples.length === 0) {
|
|
117
|
+
return '';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (format === 'json') {
|
|
121
|
+
// JSON format for structured output
|
|
122
|
+
return `\n\n### Few-Shot Examples:\n\n${examples.map((ex, i) =>
|
|
123
|
+
`**Example ${i + 1}**\n\`\`\`json\n${JSON.stringify(ex, null, 2)}\n\`\`\``
|
|
124
|
+
).join('\n\n')}`;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Default format (matches rubrics.mjs style)
|
|
128
|
+
return examples.map((ex, i) => {
|
|
129
|
+
const score = ex.score ?? ex.result?.score ?? 'N/A';
|
|
130
|
+
const desc = ex.description || ex.screenshot || 'Screenshot';
|
|
131
|
+
const evaluation = ex.evaluation || ex.result?.reasoning || '';
|
|
132
|
+
const json = ex.json || ex.result ? JSON.stringify(ex.result || ex.json, null, 2) : '';
|
|
133
|
+
|
|
134
|
+
return `**Example ${i + 1} - ${ex.quality || 'Quality'} (Score: ${score})**
|
|
135
|
+
Screenshot: ${desc}
|
|
136
|
+
Evaluation: "${evaluation}"
|
|
137
|
+
${json ? `JSON: ${json}` : ''}`;
|
|
138
|
+
}).join('\n\n');
|
|
139
|
+
}
|
|
140
|
+
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dynamic Prompt Generation for UX and Gameplay Testing
|
|
3
|
+
*
|
|
4
|
+
* Generates context-aware prompts based on:
|
|
5
|
+
* - Page state (initial load, form filled, game active, etc.)
|
|
6
|
+
* - User interactions (click, type, scroll, etc.)
|
|
7
|
+
* - Testing goals (UX improvement, gameplay validation, accessibility, etc.)
|
|
8
|
+
* - Persona perspectives
|
|
9
|
+
*
|
|
10
|
+
* Prompts adapt to the current context for more relevant and actionable feedback.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Generate dynamic prompt based on context
|
|
15
|
+
*
|
|
16
|
+
* @param {Object} context - Testing context
|
|
17
|
+
* @param {string} context.stage - Current stage ('initial', 'form', 'payment', 'gameplay', etc.)
|
|
18
|
+
* @param {Object} context.interaction - Last interaction (if any)
|
|
19
|
+
* @param {Object} context.pageState - Current page state
|
|
20
|
+
* @param {Object} context.gameState - Game state (if applicable)
|
|
21
|
+
* @param {string} context.testingGoal - Testing goal ('ux-improvement', 'gameplay', 'accessibility', etc.)
|
|
22
|
+
* @param {Object} context.persona - Persona configuration (optional)
|
|
23
|
+
* @param {Array} context.experienceNotes - Previous experience notes (optional)
|
|
24
|
+
* @returns {string} Generated prompt
|
|
25
|
+
*/
|
|
26
|
+
export function generateDynamicPrompt(context) {
|
|
27
|
+
const {
|
|
28
|
+
stage = 'initial',
|
|
29
|
+
interaction = null,
|
|
30
|
+
pageState = {},
|
|
31
|
+
gameState = null,
|
|
32
|
+
testingGoal = 'ux-improvement',
|
|
33
|
+
persona = null,
|
|
34
|
+
experienceNotes = []
|
|
35
|
+
} = context;
|
|
36
|
+
|
|
37
|
+
// Base prompt components
|
|
38
|
+
const basePrompt = buildBasePrompt(stage, testingGoal);
|
|
39
|
+
const contextPrompt = buildContextPrompt(stage, interaction, pageState, gameState);
|
|
40
|
+
const personaPrompt = persona ? buildPersonaPrompt(persona) : '';
|
|
41
|
+
const experiencePrompt = experienceNotes.length > 0 ? buildExperiencePrompt(experienceNotes) : '';
|
|
42
|
+
const specificQuestions = buildSpecificQuestions(stage, testingGoal, gameState);
|
|
43
|
+
|
|
44
|
+
return `${basePrompt}
|
|
45
|
+
|
|
46
|
+
${contextPrompt}
|
|
47
|
+
|
|
48
|
+
${personaPrompt}
|
|
49
|
+
|
|
50
|
+
${experiencePrompt}
|
|
51
|
+
|
|
52
|
+
${specificQuestions}`.trim();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Build base prompt based on stage and testing goal
|
|
57
|
+
*/
|
|
58
|
+
function buildBasePrompt(stage, testingGoal) {
|
|
59
|
+
const stagePrompts = {
|
|
60
|
+
initial: {
|
|
61
|
+
'ux-improvement': `Evaluate the initial page load from a UX perspective. Focus on first impressions, clarity of purpose, and immediate usability.`,
|
|
62
|
+
'gameplay': `Evaluate the initial page state. Is the game accessible? Is the purpose clear? Can users understand how to start?`,
|
|
63
|
+
'accessibility': `Evaluate accessibility of the initial page. Check for screen reader compatibility, keyboard navigation, color contrast, and semantic HTML.`,
|
|
64
|
+
'performance': `Evaluate the initial page load performance. Check for layout shifts, loading states, and perceived performance.`
|
|
65
|
+
},
|
|
66
|
+
form: {
|
|
67
|
+
'ux-improvement': `Evaluate the form interaction experience. Is the form intuitive? Are labels clear? Is there helpful feedback?`,
|
|
68
|
+
'gameplay': `Evaluate form completion in context of accessing gameplay. Is the flow clear? Does it feel like a barrier or a natural step?`,
|
|
69
|
+
'accessibility': `Evaluate form accessibility. Check label associations, error messages, keyboard navigation, and ARIA attributes.`,
|
|
70
|
+
'performance': `Evaluate form interaction performance. Check for input lag, validation feedback speed, and form submission responsiveness.`
|
|
71
|
+
},
|
|
72
|
+
payment: {
|
|
73
|
+
'ux-improvement': `Evaluate the payment screen UX. Is the payment code clear? Are payment links obvious? Is the flow trustworthy?`,
|
|
74
|
+
'gameplay': `Evaluate payment screen in context of gameplay access. Is the connection between payment and game clear? Is the wait reasonable?`,
|
|
75
|
+
'accessibility': `Evaluate payment screen accessibility. Can screen readers access payment codes? Are links keyboard accessible?`,
|
|
76
|
+
'performance': `Evaluate payment screen performance. Is the code visible immediately? Are links responsive?`
|
|
77
|
+
},
|
|
78
|
+
gameplay: {
|
|
79
|
+
'ux-improvement': `Evaluate gameplay UX. Is the game fun? Are controls intuitive? Is feedback clear?`,
|
|
80
|
+
'gameplay': `Evaluate gameplay mechanics. Is the game balanced? Are controls responsive? Is the experience engaging?`,
|
|
81
|
+
'accessibility': `Evaluate gameplay accessibility. Can the game be played with keyboard? Are visual indicators clear? Is there audio feedback?`,
|
|
82
|
+
'performance': `Evaluate gameplay performance. Is the frame rate smooth? Are there lag spikes? Is the game responsive?`
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
return stagePrompts[stage]?.[testingGoal] ||
|
|
87
|
+
`Evaluate the ${stage} stage from a ${testingGoal} perspective.`;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Build context-specific prompt
|
|
92
|
+
*/
|
|
93
|
+
function buildContextPrompt(stage, interaction, pageState, gameState) {
|
|
94
|
+
let context = `CURRENT CONTEXT:
|
|
95
|
+
- Stage: ${stage}`;
|
|
96
|
+
|
|
97
|
+
if (interaction) {
|
|
98
|
+
context += `\n- Last interaction: ${interaction.type} on ${interaction.target || 'unknown'}`;
|
|
99
|
+
if (interaction.timestamp) {
|
|
100
|
+
context += ` at ${new Date(interaction.timestamp).toISOString()}`;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (pageState) {
|
|
105
|
+
if (pageState.title) context += `\n- Page title: ${pageState.title}`;
|
|
106
|
+
if (pageState.activeElement) context += `\n- Active element: ${pageState.activeElement}`;
|
|
107
|
+
if (pageState.viewport) context += `\n- Viewport: ${pageState.viewport.width}x${pageState.viewport.height}`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (gameState && gameState.gameActive) {
|
|
111
|
+
context += `\n- Game state:
|
|
112
|
+
- Active: ${gameState.gameActive}
|
|
113
|
+
- Bricks remaining: ${gameState.bricks?.length || 0}
|
|
114
|
+
- Ball position: ${gameState.ball ? 'visible' : 'not visible'}
|
|
115
|
+
- Paddle position: ${gameState.paddle ? 'visible' : 'not visible'}`;
|
|
116
|
+
if (gameState.score !== undefined) context += `\n - Score: ${gameState.score}`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return context;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Build persona-specific prompt
|
|
124
|
+
*/
|
|
125
|
+
function buildPersonaPrompt(persona) {
|
|
126
|
+
if (!persona) return '';
|
|
127
|
+
|
|
128
|
+
return `PERSONA PERSPECTIVE: ${persona.name}
|
|
129
|
+
${persona.perspective || ''}
|
|
130
|
+
${persona.focus ? `Focus areas: ${Array.isArray(persona.focus) ? persona.focus.join(', ') : persona.focus}` : ''}
|
|
131
|
+
|
|
132
|
+
Evaluate from this persona's perspective.`;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Build experience history prompt
|
|
137
|
+
*/
|
|
138
|
+
function buildExperiencePrompt(experienceNotes) {
|
|
139
|
+
if (experienceNotes.length === 0) return '';
|
|
140
|
+
|
|
141
|
+
const recentNotes = experienceNotes.slice(-5); // Last 5 notes
|
|
142
|
+
const timeline = recentNotes.map((note, i) => {
|
|
143
|
+
const elapsed = note.elapsed ? `${(note.elapsed / 1000).toFixed(1)}s` : `${i + 1}`;
|
|
144
|
+
return ` ${elapsed}: ${note.observation || note.step || 'step'}`;
|
|
145
|
+
}).join('\n');
|
|
146
|
+
|
|
147
|
+
return `EXPERIENCE TIMELINE (recent):
|
|
148
|
+
${timeline}
|
|
149
|
+
|
|
150
|
+
Consider how the current state relates to the user's journey so far.`;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Build specific questions based on stage and goal
|
|
155
|
+
*/
|
|
156
|
+
function buildSpecificQuestions(stage, testingGoal, gameState) {
|
|
157
|
+
const questions = [];
|
|
158
|
+
|
|
159
|
+
// Stage-specific questions
|
|
160
|
+
if (stage === 'initial') {
|
|
161
|
+
questions.push(
|
|
162
|
+
'What is the first thing a user notices?',
|
|
163
|
+
'Is the purpose of the page immediately clear?',
|
|
164
|
+
'What action should the user take first?'
|
|
165
|
+
);
|
|
166
|
+
} else if (stage === 'form') {
|
|
167
|
+
questions.push(
|
|
168
|
+
'Are all form fields clearly labeled?',
|
|
169
|
+
'Is there helpful placeholder text or instructions?',
|
|
170
|
+
'Does the form provide feedback during interaction?',
|
|
171
|
+
'Is the submit button clearly visible and enabled?'
|
|
172
|
+
);
|
|
173
|
+
} else if (stage === 'payment') {
|
|
174
|
+
questions.push(
|
|
175
|
+
'Is the payment code easy to read and copy?',
|
|
176
|
+
'Are payment links (Venmo, CashApp) clearly visible?',
|
|
177
|
+
'Is the connection between payment and game access clear?',
|
|
178
|
+
'Does the screen feel trustworthy?'
|
|
179
|
+
);
|
|
180
|
+
} else if (stage === 'gameplay' && gameState) {
|
|
181
|
+
questions.push(
|
|
182
|
+
'Is the game visually engaging?',
|
|
183
|
+
'Are controls intuitive and responsive?',
|
|
184
|
+
'Is there clear visual feedback for actions?',
|
|
185
|
+
'Is the game balanced and fun?',
|
|
186
|
+
gameState.bricks?.length === 0 ? 'Did the user win? Is there celebration feedback?' : 'How many bricks remain? Is progress clear?'
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Testing goal-specific questions
|
|
191
|
+
if (testingGoal === 'ux-improvement') {
|
|
192
|
+
questions.push(
|
|
193
|
+
'What would improve the user experience?',
|
|
194
|
+
'Are there any friction points?',
|
|
195
|
+
'Is the flow intuitive?',
|
|
196
|
+
'What would make this more delightful?'
|
|
197
|
+
);
|
|
198
|
+
} else if (testingGoal === 'gameplay') {
|
|
199
|
+
questions.push(
|
|
200
|
+
'Is the game fun and engaging?',
|
|
201
|
+
'Are controls responsive?',
|
|
202
|
+
'Is there clear feedback for actions?',
|
|
203
|
+
'Is the difficulty appropriate?',
|
|
204
|
+
'Would you want to play again?'
|
|
205
|
+
);
|
|
206
|
+
} else if (testingGoal === 'accessibility') {
|
|
207
|
+
questions.push(
|
|
208
|
+
'Can this be used with a screen reader?',
|
|
209
|
+
'Is keyboard navigation possible?',
|
|
210
|
+
'Is color contrast sufficient?',
|
|
211
|
+
'Are interactive elements clearly indicated?',
|
|
212
|
+
'Are error messages accessible?'
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if (questions.length === 0) return '';
|
|
217
|
+
|
|
218
|
+
return `SPECIFIC QUESTIONS TO ANSWER:
|
|
219
|
+
${questions.map((q, i) => `${i + 1}. ${q}`).join('\n')}`;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Generate prompt variations for A/B testing different perspectives
|
|
224
|
+
*
|
|
225
|
+
* @param {Object} context - Testing context
|
|
226
|
+
* @param {Array} variations - Array of variation configs
|
|
227
|
+
* @returns {Array} Array of prompt variations
|
|
228
|
+
*/
|
|
229
|
+
/**
|
|
230
|
+
* Generate multiple prompt variations for testing
|
|
231
|
+
*
|
|
232
|
+
* @param {Record<string, unknown>} context - Testing context
|
|
233
|
+
* @param {Array<{ testingGoal?: string; focus?: string }>} [variations=[]] - Array of variation configs
|
|
234
|
+
* @returns {string[]} Array of prompt variations
|
|
235
|
+
*/
|
|
236
|
+
export function generatePromptVariations(context, variations = []) {
|
|
237
|
+
const defaultVariations = [
|
|
238
|
+
{ testingGoal: 'ux-improvement', focus: 'user experience and delight' },
|
|
239
|
+
{ testingGoal: 'accessibility', focus: 'inclusive design and accessibility' },
|
|
240
|
+
{ testingGoal: 'gameplay', focus: 'game mechanics and engagement' },
|
|
241
|
+
{ testingGoal: 'performance', focus: 'perceived performance and responsiveness' }
|
|
242
|
+
];
|
|
243
|
+
|
|
244
|
+
const variationsToUse = variations.length > 0 ? variations : defaultVariations;
|
|
245
|
+
|
|
246
|
+
return variationsToUse.map(variation => ({
|
|
247
|
+
...variation,
|
|
248
|
+
prompt: generateDynamicPrompt({
|
|
249
|
+
...context,
|
|
250
|
+
testingGoal: variation.testingGoal || context.testingGoal,
|
|
251
|
+
persona: variation.persona || context.persona
|
|
252
|
+
})
|
|
253
|
+
}));
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Generate interaction-specific prompt
|
|
258
|
+
*
|
|
259
|
+
* @param {Record<string, unknown>} interaction - Interaction details
|
|
260
|
+
* @param {Record<string, unknown>} beforeState - State before interaction
|
|
261
|
+
* @param {Record<string, unknown>} afterState - State after interaction
|
|
262
|
+
* @param {string} [testingGoal='ux-improvement'] - Testing goal
|
|
263
|
+
* @returns {string} Interaction-specific prompt
|
|
264
|
+
*/
|
|
265
|
+
export function generateInteractionPrompt(interaction, beforeState, afterState, testingGoal = 'ux-improvement') {
|
|
266
|
+
const interactionTypes = {
|
|
267
|
+
click: 'button or link click',
|
|
268
|
+
type: 'text input',
|
|
269
|
+
scroll: 'page scroll',
|
|
270
|
+
submit: 'form submission',
|
|
271
|
+
keypress: 'keyboard input'
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
const interactionType = interactionTypes[interaction.type] || interaction.type;
|
|
275
|
+
|
|
276
|
+
return `Evaluate the ${interactionType} interaction.
|
|
277
|
+
|
|
278
|
+
BEFORE INTERACTION:
|
|
279
|
+
${JSON.stringify(beforeState, null, 2)}
|
|
280
|
+
|
|
281
|
+
AFTER INTERACTION:
|
|
282
|
+
${JSON.stringify(afterState, null, 2)}
|
|
283
|
+
|
|
284
|
+
QUESTIONS:
|
|
285
|
+
1. Was the interaction successful?
|
|
286
|
+
2. Was there clear visual feedback?
|
|
287
|
+
3. Did the state change as expected?
|
|
288
|
+
4. Was there any delay or lag?
|
|
289
|
+
5. ${testingGoal === 'ux-improvement' ? 'How could this interaction be improved?' :
|
|
290
|
+
testingGoal === 'accessibility' ? 'Was this interaction accessible?' :
|
|
291
|
+
'Was this interaction responsive?'}`;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Generate gameplay-specific prompt
|
|
296
|
+
*
|
|
297
|
+
* Now supports variable goals/prompts via game-goal-prompts.mjs
|
|
298
|
+
*
|
|
299
|
+
* @param {Record<string, unknown>} gameState - Current game state
|
|
300
|
+
* @param {Record<string, unknown> | null} [previousState=null] - Previous game state (optional)
|
|
301
|
+
* @param {string | Object | Array | Function} [goalOrPrompt='mechanics'] - Goal, prompt, or focus area
|
|
302
|
+
* @param {Object} [context={}] - Additional context (persona, renderedCode, etc.)
|
|
303
|
+
* @returns {string} Gameplay-specific prompt
|
|
304
|
+
*/
|
|
305
|
+
export async function generateGameplayPrompt(gameState, previousState = null, goalOrPrompt = 'mechanics', context = {}) {
|
|
306
|
+
// Import variable goal system (lazy import to avoid circular dependencies)
|
|
307
|
+
let generateGamePrompt;
|
|
308
|
+
try {
|
|
309
|
+
const gameGoalModule = await import('./game-goal-prompts.mjs');
|
|
310
|
+
generateGamePrompt = gameGoalModule.generateGamePrompt;
|
|
311
|
+
} catch (err) {
|
|
312
|
+
// Fallback if module not available
|
|
313
|
+
generateGamePrompt = null;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// If variable goal system available and goalOrPrompt is not a simple string focus
|
|
317
|
+
if (generateGamePrompt && (typeof goalOrPrompt !== 'string' || !['mechanics', 'visuals', 'feedback', 'balance'].includes(goalOrPrompt))) {
|
|
318
|
+
return generateGamePrompt(goalOrPrompt, {
|
|
319
|
+
gameState,
|
|
320
|
+
previousState,
|
|
321
|
+
...context
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Legacy support for simple focus strings
|
|
326
|
+
const focusPrompts = {
|
|
327
|
+
mechanics: `Evaluate gameplay mechanics. Are controls responsive? Is collision detection accurate? Is the game balanced?`,
|
|
328
|
+
visuals: `Evaluate visual design. Are colors vibrant? Is the layout clear? Are animations smooth?`,
|
|
329
|
+
feedback: `Evaluate feedback systems. Are collisions visible? Is score clear? Are there celebration effects?`,
|
|
330
|
+
balance: `Evaluate game balance. Is difficulty appropriate? Is progression clear? Is the game engaging?`
|
|
331
|
+
};
|
|
332
|
+
|
|
333
|
+
const focus = typeof goalOrPrompt === 'string' ? goalOrPrompt : 'mechanics';
|
|
334
|
+
let prompt = `${focusPrompts[focus] || focusPrompts.mechanics}
|
|
335
|
+
|
|
336
|
+
CURRENT GAME STATE:
|
|
337
|
+
- Active: ${gameState.gameActive}
|
|
338
|
+
- Bricks: ${gameState.bricks?.length || 0} remaining
|
|
339
|
+
- Ball: ${gameState.ball ? 'visible' : 'not visible'}
|
|
340
|
+
- Paddle: ${gameState.paddle ? 'visible' : 'not visible'}`;
|
|
341
|
+
|
|
342
|
+
if (gameState.score !== undefined) {
|
|
343
|
+
prompt += `\n- Score: ${gameState.score}`;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
if (previousState) {
|
|
347
|
+
prompt += `\n\nPREVIOUS STATE:
|
|
348
|
+
- Bricks: ${previousState.bricks?.length || 0}
|
|
349
|
+
- Score: ${previousState.score || 0}`;
|
|
350
|
+
|
|
351
|
+
const bricksDestroyed = (previousState.bricks?.length || 0) - (gameState.bricks?.length || 0);
|
|
352
|
+
if (bricksDestroyed > 0) {
|
|
353
|
+
prompt += `\n\nProgress: ${bricksDestroyed} brick(s) destroyed since last check.`;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
return prompt;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ensemble Sub-Module
|
|
3
|
+
*
|
|
4
|
+
* Ensemble judging, bias detection, and counter-balancing.
|
|
5
|
+
*
|
|
6
|
+
* Import from 'ai-visual-test/ensemble'
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Ensemble judging
|
|
10
|
+
export {
|
|
11
|
+
EnsembleJudge,
|
|
12
|
+
createEnsembleJudge
|
|
13
|
+
} from '../ensemble-judge.mjs';
|
|
14
|
+
|
|
15
|
+
// Bias detection
|
|
16
|
+
export {
|
|
17
|
+
detectBias,
|
|
18
|
+
detectPositionBias
|
|
19
|
+
} from '../bias-detector.mjs';
|
|
20
|
+
|
|
21
|
+
// Bias mitigation
|
|
22
|
+
export {
|
|
23
|
+
applyBiasMitigation,
|
|
24
|
+
mitigateBias,
|
|
25
|
+
mitigatePositionBias
|
|
26
|
+
} from '../bias-mitigation.mjs';
|
|
27
|
+
|
|
28
|
+
// Position counter-balance
|
|
29
|
+
export {
|
|
30
|
+
evaluateWithCounterBalance,
|
|
31
|
+
shouldUseCounterBalance
|
|
32
|
+
} from '../position-counterbalance.mjs';
|
|
33
|
+
|
|
34
|
+
// Pair comparison
|
|
35
|
+
export {
|
|
36
|
+
comparePair,
|
|
37
|
+
rankBatch
|
|
38
|
+
} from '../pair-comparison.mjs';
|
|
39
|
+
|
|
40
|
+
// Hallucination detection
|
|
41
|
+
export {
|
|
42
|
+
detectHallucination
|
|
43
|
+
} from '../hallucination-detector.mjs';
|
|
44
|
+
|
|
45
|
+
// Research-enhanced validation
|
|
46
|
+
export {
|
|
47
|
+
validateWithResearchEnhancements,
|
|
48
|
+
validateMultipleWithPositionAnalysis,
|
|
49
|
+
validateWithLengthAlignment,
|
|
50
|
+
validateWithExplicitRubric,
|
|
51
|
+
validateWithAllResearchEnhancements
|
|
52
|
+
} from '../research-enhanced-validation.mjs';
|
|
53
|
+
|