@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hybrid Validator
|
|
3
|
+
*
|
|
4
|
+
* Combines programmatic validation with VLLM evaluation.
|
|
5
|
+
* Programmatic data provides ground truth, VLLM provides semantic reasoning.
|
|
6
|
+
*
|
|
7
|
+
* This follows the PROVE framework pattern: programmatic verification + LLM evaluation.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { validateScreenshot } from '../judge.mjs';
|
|
11
|
+
import { ValidationError } from '../errors.mjs';
|
|
12
|
+
import { assertString, assertObject } from '../type-guards.mjs';
|
|
13
|
+
import {
|
|
14
|
+
checkAllTextContrast,
|
|
15
|
+
checkKeyboardNavigation
|
|
16
|
+
} from './accessibility-programmatic.mjs';
|
|
17
|
+
import {
|
|
18
|
+
validateStateProgrammatic
|
|
19
|
+
} from './state-programmatic.mjs';
|
|
20
|
+
|
|
21
|
+
// Allow dependency injection for testing
|
|
22
|
+
let injectedValidateScreenshot = null;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Inject validateScreenshot function for testing
|
|
26
|
+
* @internal
|
|
27
|
+
* @param {Function} fn - Mock validateScreenshot function
|
|
28
|
+
*/
|
|
29
|
+
export function _injectValidateScreenshot(fn) {
|
|
30
|
+
injectedValidateScreenshot = fn;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Reset injected function
|
|
35
|
+
* @internal
|
|
36
|
+
*/
|
|
37
|
+
export function _resetValidateScreenshot() {
|
|
38
|
+
injectedValidateScreenshot = null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function getValidateScreenshot() {
|
|
42
|
+
return injectedValidateScreenshot || validateScreenshot;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Hybrid accessibility validation
|
|
47
|
+
*
|
|
48
|
+
* Combines programmatic contrast/keyboard checks with VLLM semantic evaluation.
|
|
49
|
+
* Programmatic data provides ground truth, VLLM evaluates context and criticality.
|
|
50
|
+
*
|
|
51
|
+
* @param {any} page - Playwright page object
|
|
52
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
53
|
+
* @param {number} minContrast - Minimum contrast ratio (default: 4.5)
|
|
54
|
+
* @param {object} options - Validation options
|
|
55
|
+
* @returns {Promise<import('../index.mjs').ValidationResult & {programmaticData: object}>}
|
|
56
|
+
* @throws {ValidationError} If inputs are invalid
|
|
57
|
+
*/
|
|
58
|
+
export async function validateAccessibilityHybrid(
|
|
59
|
+
page,
|
|
60
|
+
screenshotPath,
|
|
61
|
+
minContrast = 4.5,
|
|
62
|
+
options = {}
|
|
63
|
+
) {
|
|
64
|
+
// Validate inputs
|
|
65
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
66
|
+
throw new ValidationError('validateAccessibilityHybrid requires a Playwright Page object', {
|
|
67
|
+
received: typeof page,
|
|
68
|
+
hasEvaluate: typeof page?.evaluate === 'function'
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
assertString(screenshotPath, 'screenshotPath');
|
|
73
|
+
|
|
74
|
+
if (typeof minContrast !== 'number' || minContrast < 1 || minContrast > 21) {
|
|
75
|
+
throw new ValidationError('minContrast must be a number between 1 and 21', {
|
|
76
|
+
received: minContrast
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Extract programmatic data
|
|
81
|
+
const programmaticData = {
|
|
82
|
+
contrast: await checkAllTextContrast(page, minContrast),
|
|
83
|
+
keyboard: await checkKeyboardNavigation(page)
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
// Build prompt with programmatic context
|
|
87
|
+
const prompt = `
|
|
88
|
+
ACCESSIBILITY EVALUATION
|
|
89
|
+
|
|
90
|
+
PROGRAMMATIC DATA (GROUND TRUTH):
|
|
91
|
+
- Contrast: ${programmaticData.contrast.passing}/${programmaticData.contrast.total} elements pass (required: ${minContrast}:1)
|
|
92
|
+
- Violations: ${programmaticData.contrast.failing} elements fail
|
|
93
|
+
${programmaticData.contrast.violations.length > 0 ? `
|
|
94
|
+
Top violations:
|
|
95
|
+
${programmaticData.contrast.violations.slice(0, 5).map(v => ` - ${v.element}: ${v.ratio}:1 (required: ${v.required}:1)`).join('\n')}
|
|
96
|
+
` : ''}
|
|
97
|
+
- Keyboard: ${programmaticData.keyboard.focusableElements} focusable elements
|
|
98
|
+
${programmaticData.keyboard.violations.length > 0 ? `
|
|
99
|
+
Violations:
|
|
100
|
+
${programmaticData.keyboard.violations.map(v => ` - ${v.element}: ${v.issue}`).join('\n')}
|
|
101
|
+
` : ''}
|
|
102
|
+
|
|
103
|
+
EVALUATION TASK:
|
|
104
|
+
Use this programmatic data as ground truth (no hallucinations about measurements).
|
|
105
|
+
Evaluate semantic aspects:
|
|
106
|
+
1. Is contrast adequate for readability in context? (ratio alone doesn't tell you if it's readable)
|
|
107
|
+
2. Are contrast violations critical or minor? (some violations might be acceptable in context)
|
|
108
|
+
3. Is keyboard navigation intuitive? (semantic evaluation beyond just focusable elements)
|
|
109
|
+
4. Does overall accessibility support user goals? (holistic evaluation)
|
|
110
|
+
5. Are there accessibility issues that programmatic checks don't capture? (visual, semantic, contextual)
|
|
111
|
+
|
|
112
|
+
Provide actionable recommendations based on both programmatic and semantic analysis.
|
|
113
|
+
`;
|
|
114
|
+
|
|
115
|
+
// VLLM evaluation with programmatic grounding
|
|
116
|
+
const result = await getValidateScreenshot()(screenshotPath, prompt, {
|
|
117
|
+
testType: options.testType || 'accessibility-hybrid',
|
|
118
|
+
minContrast,
|
|
119
|
+
...options,
|
|
120
|
+
programmaticData
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
...result,
|
|
125
|
+
programmaticData
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Hybrid state validation
|
|
131
|
+
*
|
|
132
|
+
* Combines programmatic state extraction with VLLM semantic evaluation.
|
|
133
|
+
* Programmatic data provides ground truth, VLLM evaluates visual consistency and context.
|
|
134
|
+
*
|
|
135
|
+
* @param {any} page - Playwright page object
|
|
136
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
137
|
+
* @param {object} expectedState - Expected state object
|
|
138
|
+
* @param {object} options - Validation options
|
|
139
|
+
* @param {object} options.selectors - Map of state keys to CSS selectors
|
|
140
|
+
* @param {number} options.tolerance - Pixel tolerance (default: 5)
|
|
141
|
+
* @returns {Promise<import('../index.mjs').ValidationResult & {programmaticData: object}>}
|
|
142
|
+
* @throws {ValidationError} If inputs are invalid
|
|
143
|
+
*/
|
|
144
|
+
export async function validateStateHybrid(
|
|
145
|
+
page,
|
|
146
|
+
screenshotPath,
|
|
147
|
+
expectedState,
|
|
148
|
+
options = {}
|
|
149
|
+
) {
|
|
150
|
+
// Validate inputs
|
|
151
|
+
if (!page || typeof page.evaluate !== 'function') {
|
|
152
|
+
throw new ValidationError('validateStateHybrid requires a Playwright Page object', {
|
|
153
|
+
received: typeof page,
|
|
154
|
+
hasEvaluate: typeof page?.evaluate === 'function'
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
assertString(screenshotPath, 'screenshotPath');
|
|
159
|
+
assertObject(expectedState, 'expectedState');
|
|
160
|
+
|
|
161
|
+
const selectors = options.selectors || {};
|
|
162
|
+
const tolerance = options.tolerance || 5;
|
|
163
|
+
|
|
164
|
+
// Extract programmatic state
|
|
165
|
+
// Note: validateStateProgrammatic will extract gameState internally if available
|
|
166
|
+
// We also extract it separately for the prompt
|
|
167
|
+
const gameState = await page.evaluate(() => window.gameState || null);
|
|
168
|
+
|
|
169
|
+
// validateStateProgrammatic doesn't throw by default, it returns matches: false
|
|
170
|
+
const visualState = await validateStateProgrammatic(
|
|
171
|
+
page,
|
|
172
|
+
expectedState,
|
|
173
|
+
{ selectors, tolerance }
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
// visualState already includes gameState if extracted, but we want it in programmaticData too
|
|
177
|
+
|
|
178
|
+
const programmaticData = {
|
|
179
|
+
gameState,
|
|
180
|
+
visualState: visualState.visualState,
|
|
181
|
+
discrepancies: visualState.discrepancies,
|
|
182
|
+
matches: visualState.matches
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
// Build prompt with programmatic context
|
|
186
|
+
const prompt = `
|
|
187
|
+
STATE CONSISTENCY EVALUATION
|
|
188
|
+
|
|
189
|
+
PROGRAMMATIC DATA (GROUND TRUTH):
|
|
190
|
+
${gameState ? `Game State: ${JSON.stringify(gameState, null, 2)}` : 'Game State: Not available'}
|
|
191
|
+
Visual State: ${JSON.stringify(visualState.visualState, null, 2)}
|
|
192
|
+
Expected State: ${JSON.stringify(expectedState, null, 2)}
|
|
193
|
+
${visualState.discrepancies.length > 0 ? `
|
|
194
|
+
Discrepancies: ${visualState.discrepancies.join(', ')}
|
|
195
|
+
` : 'No discrepancies found (programmatic check passed)'}
|
|
196
|
+
|
|
197
|
+
EVALUATION TASK:
|
|
198
|
+
Use this programmatic data as ground truth (no hallucinations about positions/state).
|
|
199
|
+
Evaluate semantic aspects:
|
|
200
|
+
1. Does visual representation match programmatic state? (semantic check beyond exact positions)
|
|
201
|
+
2. Is game state consistent with gameplay? (context-aware evaluation)
|
|
202
|
+
3. Are there visual bugs that state data doesn't capture? (holistic evaluation)
|
|
203
|
+
4. Is the state transition smooth and coherent? (temporal/contextual evaluation)
|
|
204
|
+
5. Are discrepancies critical or acceptable? (context-aware criticality assessment)
|
|
205
|
+
|
|
206
|
+
Provide actionable recommendations based on both programmatic and semantic analysis.
|
|
207
|
+
`;
|
|
208
|
+
|
|
209
|
+
// VLLM evaluation with programmatic grounding
|
|
210
|
+
const result = await getValidateScreenshot()(screenshotPath, prompt, {
|
|
211
|
+
testType: options.testType || 'state-hybrid',
|
|
212
|
+
expectedState,
|
|
213
|
+
...options,
|
|
214
|
+
programmaticData
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
return {
|
|
218
|
+
...result,
|
|
219
|
+
programmaticData
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Generic hybrid validator helper
|
|
225
|
+
*
|
|
226
|
+
* Combines any programmatic data with VLLM evaluation.
|
|
227
|
+
*
|
|
228
|
+
* @param {string} screenshotPath - Path to screenshot
|
|
229
|
+
* @param {string} prompt - Base evaluation prompt
|
|
230
|
+
* @param {object} programmaticData - Programmatic validation data
|
|
231
|
+
* @param {object} options - Validation options
|
|
232
|
+
* @returns {Promise<import('../index.mjs').ValidationResult & {programmaticData: object}>}
|
|
233
|
+
*/
|
|
234
|
+
export async function validateWithProgrammaticContext(
|
|
235
|
+
screenshotPath,
|
|
236
|
+
prompt,
|
|
237
|
+
programmaticData,
|
|
238
|
+
options = {}
|
|
239
|
+
) {
|
|
240
|
+
assertString(screenshotPath, 'screenshotPath');
|
|
241
|
+
assertString(prompt, 'prompt');
|
|
242
|
+
assertObject(programmaticData, 'programmaticData');
|
|
243
|
+
|
|
244
|
+
// Build enhanced prompt with programmatic context
|
|
245
|
+
const enhancedPrompt = `
|
|
246
|
+
${prompt}
|
|
247
|
+
|
|
248
|
+
PROGRAMMATIC DATA (GROUND TRUTH):
|
|
249
|
+
${JSON.stringify(programmaticData, null, 2)}
|
|
250
|
+
|
|
251
|
+
EVALUATION INSTRUCTIONS:
|
|
252
|
+
- Use programmatic data as ground truth (no hallucinations about measurements)
|
|
253
|
+
- Evaluate semantic aspects: context, criticality, usability, consistency
|
|
254
|
+
- Report any discrepancies between programmatic data and visual appearance
|
|
255
|
+
- Provide actionable recommendations based on both programmatic and semantic analysis
|
|
256
|
+
`;
|
|
257
|
+
|
|
258
|
+
const result = await getValidateScreenshot()(screenshotPath, enhancedPrompt, {
|
|
259
|
+
...options,
|
|
260
|
+
programmaticData
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
...result,
|
|
265
|
+
programmaticData
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validators Sub-Module
|
|
3
|
+
*
|
|
4
|
+
* All validation-related functionality grouped together.
|
|
5
|
+
*
|
|
6
|
+
* Import from 'ai-visual-test/validators'
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Re-export everything from validators
|
|
10
|
+
export { StateValidator } from './state-validator.mjs';
|
|
11
|
+
export { AccessibilityValidator } from './accessibility-validator.mjs';
|
|
12
|
+
export { PromptBuilder } from './prompt-builder.mjs';
|
|
13
|
+
export { validateWithRubric } from './rubric.mjs';
|
|
14
|
+
export { BatchValidator } from './batch-validator.mjs';
|
|
15
|
+
|
|
16
|
+
// Programmatic validators (fast, deterministic)
|
|
17
|
+
export {
|
|
18
|
+
getContrastRatio,
|
|
19
|
+
checkElementContrast,
|
|
20
|
+
checkAllTextContrast,
|
|
21
|
+
checkKeyboardNavigation
|
|
22
|
+
} from './accessibility-programmatic.mjs';
|
|
23
|
+
|
|
24
|
+
export {
|
|
25
|
+
validateStateProgrammatic,
|
|
26
|
+
validateElementPosition
|
|
27
|
+
} from './state-programmatic.mjs';
|
|
28
|
+
|
|
29
|
+
// Hybrid validators (programmatic + VLLM)
|
|
30
|
+
export {
|
|
31
|
+
validateAccessibilityHybrid,
|
|
32
|
+
validateStateHybrid,
|
|
33
|
+
validateWithProgrammaticContext
|
|
34
|
+
} from './hybrid-validator.mjs';
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Builder
|
|
3
|
+
*
|
|
4
|
+
* Template-based prompt construction with rubric integration
|
|
5
|
+
*
|
|
6
|
+
* Provides:
|
|
7
|
+
* - Reusable prompt templates
|
|
8
|
+
* - Rubric integration
|
|
9
|
+
* - Context injection
|
|
10
|
+
* - Variable substitution
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { buildRubricPrompt } from '../rubrics.mjs';
|
|
14
|
+
import { ValidationError } from '../errors.mjs';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Generic prompt builder with template support
|
|
18
|
+
*/
|
|
19
|
+
export class PromptBuilder {
|
|
20
|
+
constructor(options = {}) {
|
|
21
|
+
// Validate templates
|
|
22
|
+
if (options.templates !== undefined) {
|
|
23
|
+
if (typeof options.templates !== 'object' || options.templates === null || Array.isArray(options.templates)) {
|
|
24
|
+
throw new ValidationError(
|
|
25
|
+
'templates must be a non-null object',
|
|
26
|
+
{ received: typeof options.templates }
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
this.templates = options.templates;
|
|
30
|
+
} else {
|
|
31
|
+
this.templates = {};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
this.rubric = options.rubric || null;
|
|
35
|
+
|
|
36
|
+
// Validate defaultContext
|
|
37
|
+
if (options.defaultContext !== undefined) {
|
|
38
|
+
if (typeof options.defaultContext !== 'object' || options.defaultContext === null || Array.isArray(options.defaultContext)) {
|
|
39
|
+
throw new ValidationError(
|
|
40
|
+
'defaultContext must be a non-null object',
|
|
41
|
+
{ received: typeof options.defaultContext }
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
this.defaultContext = options.defaultContext;
|
|
45
|
+
} else {
|
|
46
|
+
this.defaultContext = {};
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Build prompt with optional rubric
|
|
52
|
+
*/
|
|
53
|
+
buildPrompt(basePrompt, options = {}) {
|
|
54
|
+
let prompt = basePrompt;
|
|
55
|
+
|
|
56
|
+
// Add rubric if provided
|
|
57
|
+
if (options.rubric || this.rubric) {
|
|
58
|
+
const rubric = options.rubric || this.rubric;
|
|
59
|
+
const rubricPrompt = buildRubricPrompt(rubric, {
|
|
60
|
+
includeZeroTolerance: options.includeZeroTolerance !== false,
|
|
61
|
+
includeScoring: options.includeScoring !== false
|
|
62
|
+
});
|
|
63
|
+
prompt = `${prompt}\n\n${rubricPrompt}`;
|
|
64
|
+
|
|
65
|
+
// Add zero tolerance enforcement if applicable
|
|
66
|
+
if (options.enforceZeroTolerance !== false && rubric.criteria?.some(c => c.zeroTolerance)) {
|
|
67
|
+
prompt = `${prompt}\n\nZERO TOLERANCE ENFORCEMENT:
|
|
68
|
+
Any zero tolerance violation results in automatic failure.
|
|
69
|
+
Score is automatically set to 0 if any zero tolerance violation is detected.`;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Add context if provided
|
|
74
|
+
if (options.context || this.defaultContext) {
|
|
75
|
+
const context = { ...this.defaultContext, ...options.context };
|
|
76
|
+
if (Object.keys(context).length > 0) {
|
|
77
|
+
prompt = `${prompt}\n\nCONTEXT:
|
|
78
|
+
${JSON.stringify(context, null, 2)}`;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return prompt;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Build prompt from template with support for conditionals and loops
|
|
87
|
+
*
|
|
88
|
+
* Supports:
|
|
89
|
+
* - Variables: {{variable}}
|
|
90
|
+
* - Conditionals: {{#if condition}}...{{/if}}, {{#unless condition}}...{{/unless}}
|
|
91
|
+
* - Loops: {{#each items}}...{{/each}}
|
|
92
|
+
* - Nested templates: {{>templateName}}
|
|
93
|
+
*/
|
|
94
|
+
buildFromTemplate(templateName, variables = {}, options = {}) {
|
|
95
|
+
const template = this.templates[templateName];
|
|
96
|
+
if (!template) {
|
|
97
|
+
throw new ValidationError(
|
|
98
|
+
`Template "${templateName}" not found. Available templates: ${Object.keys(this.templates).join(', ') || 'none'}`,
|
|
99
|
+
{ templateName, availableTemplates: Object.keys(this.templates) }
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Get template string
|
|
104
|
+
let templateStr = typeof template === 'function'
|
|
105
|
+
? template(variables)
|
|
106
|
+
: template;
|
|
107
|
+
|
|
108
|
+
// Process nested templates (partials) first: {{>templateName}}
|
|
109
|
+
templateStr = templateStr.replace(/\{\{>([^}]+)\}\}/g, (match, partialName) => {
|
|
110
|
+
const trimmedName = partialName.trim();
|
|
111
|
+
if (this.templates[trimmedName]) {
|
|
112
|
+
return this.buildFromTemplate(trimmedName, variables, { ...options, skipRubric: true });
|
|
113
|
+
}
|
|
114
|
+
return match; // Return original if partial not found
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// Process loops: {{#each items}}...{{/each}}
|
|
118
|
+
// Need to process nested conditionals inside loops, so we need to handle this carefully
|
|
119
|
+
templateStr = templateStr.replace(/\{\{#each\s+([^}]+)\}\}([\s\S]*?)\{\{\/each\}\}/g, (match, arrayKey, loopBody) => {
|
|
120
|
+
const trimmedKey = arrayKey.trim();
|
|
121
|
+
const array = variables[trimmedKey];
|
|
122
|
+
if (Array.isArray(array)) {
|
|
123
|
+
return array.map((item, index) => {
|
|
124
|
+
const itemVars = {
|
|
125
|
+
...variables,
|
|
126
|
+
'@index': index,
|
|
127
|
+
'@first': index === 0,
|
|
128
|
+
'@last': index === array.length - 1
|
|
129
|
+
};
|
|
130
|
+
// If item is an object, merge its properties
|
|
131
|
+
if (typeof item === 'object' && item !== null) {
|
|
132
|
+
Object.assign(itemVars, item);
|
|
133
|
+
} else {
|
|
134
|
+
itemVars['@value'] = item;
|
|
135
|
+
}
|
|
136
|
+
// Process the loop body with item variables, including nested conditionals
|
|
137
|
+
let processedBody = loopBody;
|
|
138
|
+
// Process nested conditionals in loop body
|
|
139
|
+
processedBody = processedBody.replace(/\{\{#if\s+([^}]+)\}\}([\s\S]*?)\{\{\/if\}\}/g, (m, condKey, body) => {
|
|
140
|
+
const trimmedCond = condKey.trim();
|
|
141
|
+
const condValue = itemVars[trimmedCond];
|
|
142
|
+
const isTruthy = condValue !== undefined && condValue !== null && condValue !== false && condValue !== '';
|
|
143
|
+
return isTruthy ? this._processTemplate(body, itemVars) : '';
|
|
144
|
+
});
|
|
145
|
+
return this._processTemplate(processedBody, itemVars);
|
|
146
|
+
}).join('');
|
|
147
|
+
}
|
|
148
|
+
return ''; // Return empty if not an array
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// Process conditionals: {{#if condition}}...{{else}}...{{/if}} and {{#unless condition}}...{{/unless}}
|
|
152
|
+
templateStr = templateStr.replace(/\{\{#if\s+([^}]+)\}\}([\s\S]*?)\{\{\/if\}\}/g, (match, conditionKey, body) => {
|
|
153
|
+
const trimmedKey = conditionKey.trim();
|
|
154
|
+
const value = variables[trimmedKey];
|
|
155
|
+
const isTruthy = value !== undefined && value !== null && value !== false && value !== '';
|
|
156
|
+
|
|
157
|
+
// Check for {{else}} block
|
|
158
|
+
const elseMatch = body.match(/^([\s\S]*?)\{\{else\}\}([\s\S]*)$/);
|
|
159
|
+
if (elseMatch) {
|
|
160
|
+
const trueBody = elseMatch[1];
|
|
161
|
+
const falseBody = elseMatch[2];
|
|
162
|
+
return isTruthy
|
|
163
|
+
? this._processTemplate(trueBody, variables)
|
|
164
|
+
: this._processTemplate(falseBody, variables);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return isTruthy ? this._processTemplate(body, variables) : '';
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
templateStr = templateStr.replace(/\{\{#unless\s+([^}]+)\}\}([\s\S]*?)\{\{\/unless\}\}/g, (match, conditionKey, body) => {
|
|
171
|
+
const trimmedKey = conditionKey.trim();
|
|
172
|
+
const value = variables[trimmedKey];
|
|
173
|
+
const isFalsy = value === undefined || value === null || value === false || value === '';
|
|
174
|
+
return isFalsy ? this._processTemplate(body, variables) : '';
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// Process variables: {{variable}}
|
|
178
|
+
let prompt = this._processTemplate(templateStr, variables);
|
|
179
|
+
|
|
180
|
+
// Apply rubric and context (unless skipRubric is set for nested templates)
|
|
181
|
+
if (options.skipRubric) {
|
|
182
|
+
return prompt;
|
|
183
|
+
}
|
|
184
|
+
return this.buildPrompt(prompt, options);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Internal method to process template variables
|
|
189
|
+
* @private
|
|
190
|
+
*/
|
|
191
|
+
_processTemplate(templateStr, variables) {
|
|
192
|
+
return templateStr.replace(/\{\{([^}]+)\}\}/g, (match, key) => {
|
|
193
|
+
const trimmedKey = key.trim();
|
|
194
|
+
// Support dot notation: {{object.property}}
|
|
195
|
+
if (trimmedKey.includes('.')) {
|
|
196
|
+
const parts = trimmedKey.split('.');
|
|
197
|
+
let value = variables;
|
|
198
|
+
for (const part of parts) {
|
|
199
|
+
if (value && typeof value === 'object' && part in value) {
|
|
200
|
+
value = value[part];
|
|
201
|
+
} else {
|
|
202
|
+
return match; // Return original if path not found
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return value !== undefined ? String(value) : match;
|
|
206
|
+
}
|
|
207
|
+
return variables[trimmedKey] !== undefined ? String(variables[trimmedKey]) : match;
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Register a template
|
|
213
|
+
*/
|
|
214
|
+
registerTemplate(name, template) {
|
|
215
|
+
this.templates[name] = template;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rubric System
|
|
3
|
+
*
|
|
4
|
+
* Generic rubric-based validation with zero tolerance support
|
|
5
|
+
*
|
|
6
|
+
* Provides:
|
|
7
|
+
* - Rubric-based validation
|
|
8
|
+
* - Zero tolerance violation enforcement
|
|
9
|
+
* - Research-enhanced validation integration
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { validateWithResearchEnhancements } from '../research-enhanced-validation.mjs';
|
|
13
|
+
import { PromptBuilder } from './prompt-builder.mjs';
|
|
14
|
+
import { ValidationError } from '../errors.mjs';
|
|
15
|
+
import { assertString, assertObject } from '../type-guards.mjs';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Validate with rubric (generic, not project-specific)
|
|
19
|
+
*/
|
|
20
|
+
export async function validateWithRubric(screenshotPath, prompt, rubric, context = {}, options = {}) {
|
|
21
|
+
// Input validation
|
|
22
|
+
assertString(screenshotPath, 'screenshotPath');
|
|
23
|
+
assertString(prompt, 'prompt');
|
|
24
|
+
assertObject(rubric, 'rubric');
|
|
25
|
+
|
|
26
|
+
if (!rubric.score || !rubric.score.criteria) {
|
|
27
|
+
throw new ValidationError(
|
|
28
|
+
'Rubric must have score.criteria property',
|
|
29
|
+
{ rubric: Object.keys(rubric) }
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const builder = new PromptBuilder({ rubric });
|
|
34
|
+
const enhancedPrompt = builder.buildPrompt(prompt, {
|
|
35
|
+
enforceZeroTolerance: options.enforceZeroTolerance !== false,
|
|
36
|
+
...options
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const result = await validateWithResearchEnhancements(
|
|
41
|
+
screenshotPath,
|
|
42
|
+
enhancedPrompt,
|
|
43
|
+
{
|
|
44
|
+
...context,
|
|
45
|
+
rubric,
|
|
46
|
+
testType: context.testType || 'rubric-validation'
|
|
47
|
+
}
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
// Check for zero tolerance violations if rubric has them
|
|
51
|
+
const hasZeroTolerance = rubric?.criteria?.some(c => c.zeroTolerance) || false;
|
|
52
|
+
if (hasZeroTolerance && options.enforceZeroTolerance !== false) {
|
|
53
|
+
// Ensure issues is an array before calling .some()
|
|
54
|
+
const issues = Array.isArray(result.issues) ? result.issues : [];
|
|
55
|
+
const hasZeroToleranceViolation = issues.some(issue =>
|
|
56
|
+
typeof issue === 'string' && (
|
|
57
|
+
issue.toLowerCase().includes('zero tolerance') ||
|
|
58
|
+
issue.toLowerCase().includes('instant fail') ||
|
|
59
|
+
rubric.criteria.some(c => c.zeroTolerance && issue.includes(c.id))
|
|
60
|
+
)
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
if (hasZeroToleranceViolation) {
|
|
64
|
+
return {
|
|
65
|
+
...result,
|
|
66
|
+
score: 0,
|
|
67
|
+
assessment: 'fail',
|
|
68
|
+
zeroToleranceViolation: true
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return result;
|
|
74
|
+
} catch (error) {
|
|
75
|
+
// Re-throw ValidationError as-is, wrap others
|
|
76
|
+
if (error instanceof ValidationError) {
|
|
77
|
+
throw error;
|
|
78
|
+
}
|
|
79
|
+
throw new ValidationError(
|
|
80
|
+
`Rubric validation failed: ${error.message}`,
|
|
81
|
+
{ screenshotPath, rubricName: rubric.name, originalError: error.message }
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|