@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cost Tracking Utilities
|
|
3
|
+
*
|
|
4
|
+
* Tracks API costs over time, provides cost estimates, and helps optimize spending.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { getCached, setCached } from './cache.mjs';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Cost Tracker Class
|
|
11
|
+
*
|
|
12
|
+
* Tracks costs across multiple validations and provides cost analysis.
|
|
13
|
+
*/
|
|
14
|
+
export class CostTracker {
|
|
15
|
+
constructor(options = {}) {
|
|
16
|
+
this.storageKey = options.storageKey || 'ai-visual-test-costs';
|
|
17
|
+
this.maxHistory = options.maxHistory || 1000;
|
|
18
|
+
this.costs = this.loadCosts();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Load costs from cache/storage
|
|
23
|
+
*/
|
|
24
|
+
loadCosts() {
|
|
25
|
+
try {
|
|
26
|
+
const cached = getCached(this.storageKey, 'cost-tracker', {});
|
|
27
|
+
return cached || { history: [], totals: {}, byProvider: {} };
|
|
28
|
+
} catch {
|
|
29
|
+
return { history: [], totals: {}, byProvider: {} };
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Save costs to cache/storage
|
|
35
|
+
*/
|
|
36
|
+
saveCosts() {
|
|
37
|
+
try {
|
|
38
|
+
setCached(this.storageKey, 'cost-tracker', this.costs, {});
|
|
39
|
+
} catch {
|
|
40
|
+
// Silently fail if cache unavailable
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Record a cost
|
|
46
|
+
*
|
|
47
|
+
* @param {{
|
|
48
|
+
* provider: string;
|
|
49
|
+
* cost: number;
|
|
50
|
+
* inputTokens?: number;
|
|
51
|
+
* outputTokens?: number;
|
|
52
|
+
* timestamp?: number;
|
|
53
|
+
* testName?: string;
|
|
54
|
+
* }} costData - Cost data to record
|
|
55
|
+
*/
|
|
56
|
+
recordCost(costData) {
|
|
57
|
+
const {
|
|
58
|
+
provider,
|
|
59
|
+
cost,
|
|
60
|
+
inputTokens = 0,
|
|
61
|
+
outputTokens = 0,
|
|
62
|
+
timestamp = Date.now(),
|
|
63
|
+
testName = 'unknown'
|
|
64
|
+
} = costData;
|
|
65
|
+
|
|
66
|
+
if (cost === null || cost === undefined) return; // Skip null costs
|
|
67
|
+
|
|
68
|
+
const entry = {
|
|
69
|
+
provider,
|
|
70
|
+
cost,
|
|
71
|
+
inputTokens,
|
|
72
|
+
outputTokens,
|
|
73
|
+
timestamp,
|
|
74
|
+
testName,
|
|
75
|
+
date: new Date(timestamp).toISOString().split('T')[0] // YYYY-MM-DD
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
// Add to history
|
|
79
|
+
this.costs.history.push(entry);
|
|
80
|
+
|
|
81
|
+
// Trim history if too long
|
|
82
|
+
if (this.costs.history.length > this.maxHistory) {
|
|
83
|
+
this.costs.history = this.costs.history.slice(-this.maxHistory);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Update totals
|
|
87
|
+
this.costs.totals.total = (this.costs.totals.total || 0) + cost;
|
|
88
|
+
this.costs.totals.count = (this.costs.totals.count || 0) + 1;
|
|
89
|
+
|
|
90
|
+
// Update by provider
|
|
91
|
+
if (!this.costs.byProvider[provider]) {
|
|
92
|
+
this.costs.byProvider[provider] = { total: 0, count: 0, inputTokens: 0, outputTokens: 0 };
|
|
93
|
+
}
|
|
94
|
+
this.costs.byProvider[provider].total += cost;
|
|
95
|
+
this.costs.byProvider[provider].count += 1;
|
|
96
|
+
this.costs.byProvider[provider].inputTokens += inputTokens;
|
|
97
|
+
this.costs.byProvider[provider].outputTokens += outputTokens;
|
|
98
|
+
|
|
99
|
+
// Update by date
|
|
100
|
+
if (!this.costs.byDate) {
|
|
101
|
+
this.costs.byDate = {};
|
|
102
|
+
}
|
|
103
|
+
if (!this.costs.byDate[entry.date]) {
|
|
104
|
+
this.costs.byDate[entry.date] = { total: 0, count: 0 };
|
|
105
|
+
}
|
|
106
|
+
this.costs.byDate[entry.date].total += cost;
|
|
107
|
+
this.costs.byDate[entry.date].count += 1;
|
|
108
|
+
|
|
109
|
+
this.saveCosts();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get cost statistics
|
|
114
|
+
*
|
|
115
|
+
* @returns {{
|
|
116
|
+
* total: number;
|
|
117
|
+
* count: number;
|
|
118
|
+
* average: number;
|
|
119
|
+
* byProvider: Record<string, { total: number; count: number; average: number }>;
|
|
120
|
+
* byDate: Record<string, { total: number; count: number }>;
|
|
121
|
+
* recent: Array<{ provider: string; cost: number; timestamp: number }>;
|
|
122
|
+
* }} Cost statistics
|
|
123
|
+
*/
|
|
124
|
+
getStats() {
|
|
125
|
+
const stats = {
|
|
126
|
+
total: this.costs.totals.total || 0,
|
|
127
|
+
count: this.costs.totals.count || 0,
|
|
128
|
+
average: 0,
|
|
129
|
+
byProvider: {},
|
|
130
|
+
byDate: this.costs.byDate || {},
|
|
131
|
+
recent: this.costs.history.slice(-10).map(e => ({
|
|
132
|
+
provider: e.provider,
|
|
133
|
+
cost: e.cost,
|
|
134
|
+
timestamp: e.timestamp,
|
|
135
|
+
testName: e.testName
|
|
136
|
+
}))
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
if (stats.count > 0) {
|
|
140
|
+
stats.average = stats.total / stats.count;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Calculate averages by provider
|
|
144
|
+
for (const [provider, data] of Object.entries(this.costs.byProvider)) {
|
|
145
|
+
stats.byProvider[provider] = {
|
|
146
|
+
total: data.total,
|
|
147
|
+
count: data.count,
|
|
148
|
+
average: data.count > 0 ? data.total / data.count : 0,
|
|
149
|
+
inputTokens: data.inputTokens,
|
|
150
|
+
outputTokens: data.outputTokens
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return stats;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Get cost projection
|
|
159
|
+
*
|
|
160
|
+
* @param {number} [days=30] - Number of days to project
|
|
161
|
+
* @returns {{ projected: number; dailyAverage: number; trend: 'increasing' | 'decreasing' | 'stable' }} Projection
|
|
162
|
+
*/
|
|
163
|
+
getProjection(days = 30) {
|
|
164
|
+
const stats = this.getStats();
|
|
165
|
+
const dailyAverage = stats.byDate ?
|
|
166
|
+
Object.values(stats.byDate).reduce((sum, day) => sum + day.total, 0) /
|
|
167
|
+
Math.max(Object.keys(stats.byDate).length, 1) : 0;
|
|
168
|
+
|
|
169
|
+
const projected = dailyAverage * days;
|
|
170
|
+
|
|
171
|
+
// Simple trend detection (last 7 days vs previous 7 days)
|
|
172
|
+
const dates = Object.keys(stats.byDate || {}).sort().slice(-14);
|
|
173
|
+
let trend = 'stable';
|
|
174
|
+
if (dates.length >= 14) {
|
|
175
|
+
const recent = dates.slice(-7).reduce((sum, d) => sum + (stats.byDate[d]?.total || 0), 0);
|
|
176
|
+
const previous = dates.slice(0, 7).reduce((sum, d) => sum + (stats.byDate[d]?.total || 0), 0);
|
|
177
|
+
if (recent > previous * 1.1) trend = 'increasing';
|
|
178
|
+
else if (recent < previous * 0.9) trend = 'decreasing';
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return { projected, dailyAverage, trend };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Check if cost exceeds threshold
|
|
186
|
+
*
|
|
187
|
+
* @param {number} threshold - Cost threshold
|
|
188
|
+
* @returns {{ exceeded: boolean; current: number; remaining: number }} Threshold check
|
|
189
|
+
*/
|
|
190
|
+
checkThreshold(threshold) {
|
|
191
|
+
const current = this.getStats().total;
|
|
192
|
+
return {
|
|
193
|
+
exceeded: current >= threshold,
|
|
194
|
+
current,
|
|
195
|
+
remaining: Math.max(0, threshold - current)
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Reset cost tracking
|
|
201
|
+
*/
|
|
202
|
+
reset() {
|
|
203
|
+
this.costs = { history: [], totals: {}, byProvider: {}, byDate: {} };
|
|
204
|
+
this.saveCosts();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Export cost data
|
|
209
|
+
*
|
|
210
|
+
* @returns {object} Cost data for export
|
|
211
|
+
*/
|
|
212
|
+
export() {
|
|
213
|
+
return {
|
|
214
|
+
...this.costs,
|
|
215
|
+
stats: this.getStats(),
|
|
216
|
+
projection: this.getProjection(30)
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Global cost tracker instance
|
|
223
|
+
*/
|
|
224
|
+
let globalCostTracker = null;
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Get or create global cost tracker
|
|
228
|
+
*
|
|
229
|
+
* @param {object} [options] - Cost tracker options
|
|
230
|
+
* @returns {CostTracker} Cost tracker instance
|
|
231
|
+
*/
|
|
232
|
+
export function getCostTracker(options = {}) {
|
|
233
|
+
if (!globalCostTracker) {
|
|
234
|
+
globalCostTracker = new CostTracker(options);
|
|
235
|
+
}
|
|
236
|
+
return globalCostTracker;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Record cost (convenience function)
|
|
241
|
+
*
|
|
242
|
+
* @param {object} costData - Cost data
|
|
243
|
+
*/
|
|
244
|
+
export function recordCost(costData) {
|
|
245
|
+
const tracker = getCostTracker();
|
|
246
|
+
tracker.recordCost(costData);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Get cost statistics (convenience function)
|
|
251
|
+
*
|
|
252
|
+
* @returns {object} Cost statistics
|
|
253
|
+
*/
|
|
254
|
+
export function getCostStats() {
|
|
255
|
+
return getCostTracker().getStats();
|
|
256
|
+
}
|
|
257
|
+
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cross-Modal Consistency Checker
|
|
3
|
+
*
|
|
4
|
+
* Validates consistency between screenshot (visual) and HTML/CSS (structural).
|
|
5
|
+
* Detects mismatches between what's shown visually and what the code structure indicates.
|
|
6
|
+
*
|
|
7
|
+
* Research:
|
|
8
|
+
* - "Cross-Modal Consistency in Multimodal Large Language Models" - Consistency issues in GPT-4V
|
|
9
|
+
* - "Verifying Cross-modal Entity Consistency in News using Vision-language Models" (LVLM4CEC)
|
|
10
|
+
* - "Hallucination Detection in Vision-Language Models" - Multiple papers on detecting unfaithful outputs
|
|
11
|
+
*
|
|
12
|
+
* Key findings: Consistency is a major challenge. Entity verification is critical.
|
|
13
|
+
* Hallucination detection is needed. Modality gap contributes to issues.
|
|
14
|
+
*
|
|
15
|
+
* Note: This implementation uses heuristic-based checking. Full research implementation
|
|
16
|
+
* would use VLLM-based verification and entity-level consistency checking as in LVLM4CEC.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { warn, log } from './logger.mjs';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Check consistency between screenshot and HTML/CSS
|
|
23
|
+
*
|
|
24
|
+
* @param {Object} options - Consistency check options
|
|
25
|
+
* @param {string} [options.screenshot] - Screenshot path
|
|
26
|
+
* @param {Object} [options.renderedCode] - Rendered code (HTML/CSS/DOM)
|
|
27
|
+
* @param {Object} [options.gameState] - Game state
|
|
28
|
+
* @param {Object} [options.pageState] - Page state
|
|
29
|
+
* @param {boolean} [options.strict=false] - Strict mode (warn on all inconsistencies)
|
|
30
|
+
* @returns {Object} Consistency check result
|
|
31
|
+
*/
|
|
32
|
+
export function checkCrossModalConsistency(options = {}) {
|
|
33
|
+
const {
|
|
34
|
+
screenshot,
|
|
35
|
+
renderedCode,
|
|
36
|
+
gameState,
|
|
37
|
+
pageState,
|
|
38
|
+
strict = false
|
|
39
|
+
} = options;
|
|
40
|
+
|
|
41
|
+
const issues = [];
|
|
42
|
+
const warnings = [];
|
|
43
|
+
const checks = {
|
|
44
|
+
hasScreenshot: !!screenshot,
|
|
45
|
+
hasRenderedCode: !!renderedCode,
|
|
46
|
+
hasHTML: !!renderedCode?.html,
|
|
47
|
+
hasCSS: !!renderedCode?.criticalCSS,
|
|
48
|
+
hasDOM: !!renderedCode?.domStructure,
|
|
49
|
+
hasGameState: !!gameState,
|
|
50
|
+
hasPageState: !!pageState
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
// Check 1: Basic presence
|
|
54
|
+
if (!screenshot && !renderedCode) {
|
|
55
|
+
issues.push('Missing both screenshot and rendered code - cannot check consistency');
|
|
56
|
+
return {
|
|
57
|
+
isConsistent: false,
|
|
58
|
+
issues,
|
|
59
|
+
warnings,
|
|
60
|
+
checks,
|
|
61
|
+
score: 0
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Check 2: HTML structure vs visual expectations
|
|
66
|
+
if (renderedCode?.html && renderedCode?.domStructure) {
|
|
67
|
+
// Check if key elements exist in DOM but might not be visible
|
|
68
|
+
const domElements = Object.keys(renderedCode.domStructure);
|
|
69
|
+
if (domElements.length === 0) {
|
|
70
|
+
warnings.push('DOM structure is empty - may indicate missing elements');
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Check for hidden elements that should be visible
|
|
74
|
+
if (renderedCode.domStructure.prideParade && !renderedCode.domStructure.prideParade.exists) {
|
|
75
|
+
warnings.push('Pride parade element missing from DOM structure');
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check 3: CSS positioning vs visual layout
|
|
80
|
+
if (renderedCode?.criticalCSS) {
|
|
81
|
+
const cssElements = Object.keys(renderedCode.criticalCSS);
|
|
82
|
+
|
|
83
|
+
// Check for positioning issues
|
|
84
|
+
for (const [selector, styles] of Object.entries(renderedCode.criticalCSS)) {
|
|
85
|
+
if (styles.position === 'absolute' && (styles.top === 'auto' || styles.left === 'auto')) {
|
|
86
|
+
warnings.push(`Element '${selector}' has absolute positioning but auto top/left - may cause layout issues`);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (styles.display === 'none' && selector.includes('game')) {
|
|
90
|
+
warnings.push(`Game element '${selector}' has display:none - may not be visible`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (styles.visibility === 'hidden' && selector.includes('game')) {
|
|
94
|
+
warnings.push(`Game element '${selector}' has visibility:hidden - may not be visible`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Check 4: Game state vs visual display
|
|
100
|
+
if (gameState && renderedCode?.domStructure) {
|
|
101
|
+
// Check if game state indicates active game but DOM doesn't show game elements
|
|
102
|
+
if (gameState.gameActive && !renderedCode.domStructure.game) {
|
|
103
|
+
warnings.push('Game state indicates active game but game elements not found in DOM');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check score consistency (if score is in game state and visible in DOM)
|
|
107
|
+
if (gameState.score !== undefined) {
|
|
108
|
+
// This would require VLLM to extract score from screenshot
|
|
109
|
+
// For now, just note that we should check this
|
|
110
|
+
if (strict) {
|
|
111
|
+
warnings.push('Game state has score - should verify it matches visual display');
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Check 5: Page state vs rendered code
|
|
117
|
+
if (pageState && renderedCode?.html) {
|
|
118
|
+
// Check if page title matches
|
|
119
|
+
if (pageState.title && !renderedCode.html.includes(pageState.title)) {
|
|
120
|
+
warnings.push(`Page title '${pageState.title}' not found in HTML`);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Calculate consistency score
|
|
125
|
+
const totalChecks = Object.values(checks).filter(Boolean).length;
|
|
126
|
+
const issueCount = issues.length;
|
|
127
|
+
const warningCount = warnings.length;
|
|
128
|
+
const maxIssues = 5; // Normalize to 0-1 scale
|
|
129
|
+
const consistencyScore = Math.max(0, 1 - (issueCount / maxIssues) - (warningCount / (maxIssues * 2)));
|
|
130
|
+
|
|
131
|
+
const isConsistent = issueCount === 0 && (strict ? warningCount === 0 : true);
|
|
132
|
+
|
|
133
|
+
// Log warnings if any
|
|
134
|
+
if (warnings.length > 0) {
|
|
135
|
+
log(`[Cross-Modal Consistency] ${warnings.length} warning(s):`, warnings);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (issues.length > 0) {
|
|
139
|
+
warn(`[Cross-Modal Consistency] ${issues.length} issue(s):`, issues);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
isConsistent,
|
|
144
|
+
issues,
|
|
145
|
+
warnings,
|
|
146
|
+
checks,
|
|
147
|
+
score: consistencyScore,
|
|
148
|
+
summary: issueCount === 0 && warningCount === 0
|
|
149
|
+
? 'All consistency checks passed'
|
|
150
|
+
: `${issueCount} issue(s), ${warningCount} warning(s)`
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Validate experience consistency (convenience function)
|
|
156
|
+
*
|
|
157
|
+
* @param {Object} experience - Experience object from experiencePageAsPersona
|
|
158
|
+
* @param {Object} [options={}] - Validation options
|
|
159
|
+
* @returns {Object} Consistency check result
|
|
160
|
+
*/
|
|
161
|
+
export function validateExperienceConsistency(experience, options = {}) {
|
|
162
|
+
return checkCrossModalConsistency({
|
|
163
|
+
screenshot: experience.screenshots?.[0]?.path,
|
|
164
|
+
renderedCode: experience.renderedCode,
|
|
165
|
+
gameState: experience.pageState?.gameState || experience.gameState,
|
|
166
|
+
pageState: experience.pageState,
|
|
167
|
+
...options
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured Data Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts structured data from VLLM responses using multiple strategies:
|
|
5
|
+
* - JSON parsing (if response contains JSON)
|
|
6
|
+
* - LLM extraction (if LLM is available)
|
|
7
|
+
* - Regex fallback (simple patterns)
|
|
8
|
+
*
|
|
9
|
+
* General-purpose utility - no domain-specific logic.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { createConfig } from './config.mjs';
|
|
13
|
+
import { loadEnv } from './load-env.mjs';
|
|
14
|
+
import { warn } from './logger.mjs';
|
|
15
|
+
// Load env before LLM utils
|
|
16
|
+
loadEnv();
|
|
17
|
+
// Use shared LLM utility library for text-only calls (optional dependency)
|
|
18
|
+
// Note: This module uses Claude Sonnet (advanced tier) for data extraction
|
|
19
|
+
// which requires higher quality than simple validation tasks
|
|
20
|
+
// Import is handled dynamically to make it optional
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract structured data from text using multiple strategies
|
|
24
|
+
*
|
|
25
|
+
* @param {string} text - Text to extract data from
|
|
26
|
+
* @param {Record<string, { type: string; [key: string]: unknown }>} schema - Schema definition for extraction
|
|
27
|
+
* @param {{
|
|
28
|
+
* method?: 'json' | 'llm' | 'regex';
|
|
29
|
+
* provider?: string;
|
|
30
|
+
* apiKey?: string;
|
|
31
|
+
* fallback?: 'llm' | 'regex' | 'json' | 'auto';
|
|
32
|
+
* }} [options={}] - Extraction options
|
|
33
|
+
* @returns {Promise<unknown>} Extracted structured data matching schema, or null if extraction fails
|
|
34
|
+
*/
|
|
35
|
+
export async function extractStructuredData(text, schema, options = {}) {
|
|
36
|
+
if (!text) return null;
|
|
37
|
+
|
|
38
|
+
const {
|
|
39
|
+
fallback = 'llm', // 'llm' | 'regex' | 'json'
|
|
40
|
+
provider = null
|
|
41
|
+
} = options;
|
|
42
|
+
|
|
43
|
+
// Strategy 1: Try JSON parsing first (fastest)
|
|
44
|
+
try {
|
|
45
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
46
|
+
if (jsonMatch) {
|
|
47
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
48
|
+
// Validate against schema
|
|
49
|
+
if (validateSchema(parsed, schema)) {
|
|
50
|
+
return parsed;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
} catch (error) {
|
|
54
|
+
// JSON parsing failed, try next strategy
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Strategy 2: Try LLM extraction (if available and requested)
|
|
58
|
+
if (fallback === 'llm' || fallback === 'auto') {
|
|
59
|
+
try {
|
|
60
|
+
const config = createConfig({ provider });
|
|
61
|
+
if (config.enabled) {
|
|
62
|
+
const extracted = await extractWithLLM(text, schema, config);
|
|
63
|
+
if (extracted) {
|
|
64
|
+
return extracted;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
} catch (error) {
|
|
68
|
+
warn(`[DataExtractor] LLM extraction failed: ${error.message}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Strategy 3: Try regex fallback
|
|
73
|
+
if (fallback === 'regex' || fallback === 'auto') {
|
|
74
|
+
try {
|
|
75
|
+
const extracted = extractWithRegex(text, schema);
|
|
76
|
+
if (extracted) {
|
|
77
|
+
return extracted;
|
|
78
|
+
}
|
|
79
|
+
} catch (error) {
|
|
80
|
+
warn(`[DataExtractor] Regex extraction failed: ${error.message}`);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Extract structured data using LLM
|
|
89
|
+
*/
|
|
90
|
+
async function extractWithLLM(text, schema, config) {
|
|
91
|
+
const prompt = `Extract structured data from the following text. Return ONLY valid JSON matching this schema:
|
|
92
|
+
|
|
93
|
+
Schema:
|
|
94
|
+
${JSON.stringify(schema, null, 2)}
|
|
95
|
+
|
|
96
|
+
Text to extract from:
|
|
97
|
+
${text}
|
|
98
|
+
|
|
99
|
+
Return ONLY the JSON object, no other text.`;
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const response = await callLLMForExtraction(prompt, config);
|
|
103
|
+
// Try to extract JSON from response
|
|
104
|
+
let parsed;
|
|
105
|
+
try {
|
|
106
|
+
const llmUtils = await import('@arclabs561/llm-utils');
|
|
107
|
+
parsed = llmUtils.extractJSON(response);
|
|
108
|
+
} catch (error) {
|
|
109
|
+
// Fallback: try to parse as JSON directly
|
|
110
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
111
|
+
if (jsonMatch) {
|
|
112
|
+
parsed = JSON.parse(jsonMatch[0]);
|
|
113
|
+
} else {
|
|
114
|
+
throw new Error('Could not extract JSON from response');
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (parsed && validateSchema(parsed, schema)) {
|
|
118
|
+
return parsed;
|
|
119
|
+
}
|
|
120
|
+
} catch (error) {
|
|
121
|
+
warn(`[DataExtractor] LLM extraction error: ${error.message}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Call LLM API (text-only, no vision)
|
|
129
|
+
* Uses shared utility with advanced tier for better extraction quality
|
|
130
|
+
*/
|
|
131
|
+
async function callLLMForExtraction(prompt, config) {
|
|
132
|
+
const apiKey = config.apiKey;
|
|
133
|
+
const provider = config.provider || 'gemini';
|
|
134
|
+
|
|
135
|
+
// Try to use optional llm-utils library if available
|
|
136
|
+
try {
|
|
137
|
+
const llmUtils = await import('@arclabs561/llm-utils');
|
|
138
|
+
const callLLMUtil = llmUtils.callLLM;
|
|
139
|
+
// Use advanced tier for data extraction (needs higher quality)
|
|
140
|
+
return await callLLMUtil(prompt, provider, apiKey, {
|
|
141
|
+
tier: 'advanced', // Data extraction benefits from better models
|
|
142
|
+
temperature: 0.1,
|
|
143
|
+
maxTokens: 1000,
|
|
144
|
+
});
|
|
145
|
+
} catch (error) {
|
|
146
|
+
// Fallback: use local implementation or throw
|
|
147
|
+
throw new Error(`LLM extraction requires @arclabs561/llm-utils package: ${error.message}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Escape special regex characters to prevent ReDoS
|
|
153
|
+
*/
|
|
154
|
+
function escapeRegex(str) {
|
|
155
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Extract structured data using regex patterns
|
|
160
|
+
*/
|
|
161
|
+
function extractWithRegex(text, schema) {
|
|
162
|
+
const result = {};
|
|
163
|
+
|
|
164
|
+
for (const [key, field] of Object.entries(schema)) {
|
|
165
|
+
const { type, required = false } = field;
|
|
166
|
+
|
|
167
|
+
// Escape key to prevent ReDoS attacks
|
|
168
|
+
const escapedKey = escapeRegex(key);
|
|
169
|
+
|
|
170
|
+
// Try to find value in text
|
|
171
|
+
let value = null;
|
|
172
|
+
|
|
173
|
+
if (type === 'number') {
|
|
174
|
+
// Look for number patterns
|
|
175
|
+
const match = text.match(new RegExp(`${escapedKey}[\\s:=]+([0-9.]+)`, 'i'));
|
|
176
|
+
if (match) {
|
|
177
|
+
value = parseFloat(match[1]);
|
|
178
|
+
}
|
|
179
|
+
} else if (type === 'string') {
|
|
180
|
+
// Look for string patterns
|
|
181
|
+
const match = text.match(new RegExp(`${escapedKey}[\\s:=]+([^\\n,]+)`, 'i'));
|
|
182
|
+
if (match) {
|
|
183
|
+
value = match[1].trim();
|
|
184
|
+
}
|
|
185
|
+
} else if (type === 'boolean') {
|
|
186
|
+
// Look for boolean patterns
|
|
187
|
+
const match = text.match(new RegExp(`${escapedKey}[\\s:=]+(true|false|yes|no)`, 'i'));
|
|
188
|
+
if (match) {
|
|
189
|
+
value = match[1].toLowerCase() === 'true' || match[1].toLowerCase() === 'yes';
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (value !== null) {
|
|
194
|
+
result[key] = value;
|
|
195
|
+
} else if (required) {
|
|
196
|
+
// Required field not found
|
|
197
|
+
return null;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return Object.keys(result).length > 0 ? result : null;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Validate extracted data against schema
|
|
206
|
+
*/
|
|
207
|
+
function validateSchema(data, schema) {
|
|
208
|
+
if (!data || typeof data !== 'object') return false;
|
|
209
|
+
|
|
210
|
+
for (const [key, field] of Object.entries(schema)) {
|
|
211
|
+
const { type, required = false } = field;
|
|
212
|
+
|
|
213
|
+
if (required && !(key in data)) {
|
|
214
|
+
return false;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (key in data) {
|
|
218
|
+
const value = data[key];
|
|
219
|
+
|
|
220
|
+
if (type === 'number' && typeof value !== 'number') {
|
|
221
|
+
return false;
|
|
222
|
+
} else if (type === 'string' && typeof value !== 'string') {
|
|
223
|
+
return false;
|
|
224
|
+
} else if (type === 'boolean' && typeof value !== 'boolean') {
|
|
225
|
+
return false;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
|