@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Experience Propagation Tracker
|
|
3
|
+
*
|
|
4
|
+
* Tracks and logs how experience (screenshots, HTML/CSS, state) propagates through the system.
|
|
5
|
+
* Provides visibility into propagation paths and validates that context is preserved.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { log, warn } from './logger.mjs';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Track experience propagation through the system
|
|
12
|
+
*/
|
|
13
|
+
export class ExperiencePropagationTracker {
|
|
14
|
+
constructor(options = {}) {
|
|
15
|
+
this.enabled = options.enabled !== false; // Default enabled
|
|
16
|
+
this.logLevel = options.logLevel || 'info'; // 'debug', 'info', 'warn'
|
|
17
|
+
this.propagationPath = [];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Track HTML/CSS propagation at a stage
|
|
22
|
+
*
|
|
23
|
+
* @param {string} stage - Stage name (e.g., 'capture', 'notes', 'trace', 'aggregation', 'context', 'evaluation')
|
|
24
|
+
* @param {Object} context - Context object
|
|
25
|
+
* @param {Object} [context.renderedCode] - Rendered code (HTML/CSS)
|
|
26
|
+
* @param {string} [context.screenshot] - Screenshot path
|
|
27
|
+
* @param {Object} [context.state] - State object
|
|
28
|
+
* @param {string} [description] - Optional description
|
|
29
|
+
*/
|
|
30
|
+
track(stage, context, description = '') {
|
|
31
|
+
if (!this.enabled) return;
|
|
32
|
+
|
|
33
|
+
const hasRenderedCode = !!context?.renderedCode;
|
|
34
|
+
const hasHTML = !!context?.renderedCode?.html;
|
|
35
|
+
const hasCSS = !!context?.renderedCode?.criticalCSS;
|
|
36
|
+
const hasDOM = !!context?.renderedCode?.domStructure;
|
|
37
|
+
const htmlLength = context?.renderedCode?.html?.length || 0;
|
|
38
|
+
const hasScreenshot = !!context?.screenshot;
|
|
39
|
+
const hasState = !!context?.state || !!context?.pageState || !!context?.gameState;
|
|
40
|
+
|
|
41
|
+
const propagation = {
|
|
42
|
+
stage,
|
|
43
|
+
timestamp: Date.now(),
|
|
44
|
+
hasRenderedCode,
|
|
45
|
+
hasHTML,
|
|
46
|
+
hasCSS,
|
|
47
|
+
hasDOM,
|
|
48
|
+
htmlLength,
|
|
49
|
+
hasScreenshot,
|
|
50
|
+
hasState,
|
|
51
|
+
description
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
this.propagationPath.push(propagation);
|
|
55
|
+
|
|
56
|
+
// Log based on log level
|
|
57
|
+
if (this.logLevel === 'debug' || this.logLevel === 'info') {
|
|
58
|
+
log(`[Experience Propagation] ${stage}:`, {
|
|
59
|
+
renderedCode: hasRenderedCode ? '✓' : '✗',
|
|
60
|
+
html: hasHTML ? `✓ (${htmlLength} chars)` : '✗',
|
|
61
|
+
css: hasCSS ? '✓' : '✗',
|
|
62
|
+
dom: hasDOM ? '✓' : '✗',
|
|
63
|
+
screenshot: hasScreenshot ? '✓' : '✗',
|
|
64
|
+
state: hasState ? '✓' : '✗',
|
|
65
|
+
description
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Warn if context is lost
|
|
70
|
+
if (this.propagationPath.length > 1) {
|
|
71
|
+
const previous = this.propagationPath[this.propagationPath.length - 2];
|
|
72
|
+
if (previous.hasRenderedCode && !hasRenderedCode) {
|
|
73
|
+
warn(`[Experience Propagation] WARNING: RenderedCode lost at stage '${stage}'`);
|
|
74
|
+
}
|
|
75
|
+
if (previous.hasHTML && !hasHTML) {
|
|
76
|
+
warn(`[Experience Propagation] WARNING: HTML lost at stage '${stage}'`);
|
|
77
|
+
}
|
|
78
|
+
if (previous.hasCSS && !hasCSS) {
|
|
79
|
+
warn(`[Experience Propagation] WARNING: CSS lost at stage '${stage}'`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return propagation;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Get propagation path summary
|
|
88
|
+
*/
|
|
89
|
+
getSummary() {
|
|
90
|
+
return {
|
|
91
|
+
path: this.propagationPath,
|
|
92
|
+
stages: this.propagationPath.map(p => p.stage),
|
|
93
|
+
hasRenderedCodeAtAllStages: this.propagationPath.every(p => p.hasRenderedCode),
|
|
94
|
+
hasHTMLAtAllStages: this.propagationPath.every(p => p.hasHTML),
|
|
95
|
+
hasCSSAtAllStages: this.propagationPath.every(p => p.hasCSS),
|
|
96
|
+
htmlLengthProgression: this.propagationPath.map(p => p.htmlLength)
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Reset propagation tracking
|
|
102
|
+
*/
|
|
103
|
+
reset() {
|
|
104
|
+
this.propagationPath = [];
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Global tracker instance
|
|
109
|
+
let globalTracker = null;
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Get or create global propagation tracker
|
|
113
|
+
*/
|
|
114
|
+
export function getPropagationTracker(options = {}) {
|
|
115
|
+
if (!globalTracker) {
|
|
116
|
+
globalTracker = new ExperiencePropagationTracker(options);
|
|
117
|
+
}
|
|
118
|
+
return globalTracker;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Track experience propagation (convenience function)
|
|
123
|
+
*/
|
|
124
|
+
export function trackPropagation(stage, context, description = '') {
|
|
125
|
+
const tracker = getPropagationTracker();
|
|
126
|
+
return tracker.track(stage, context, description);
|
|
127
|
+
}
|
|
128
|
+
|
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Experience Tracer
|
|
3
|
+
*
|
|
4
|
+
* Tracks and traces user experiences over time, integrating:
|
|
5
|
+
* - Persona-based experience notes
|
|
6
|
+
* - VLLM validation results
|
|
7
|
+
* - Temporal aggregation
|
|
8
|
+
* - State changes and interactions
|
|
9
|
+
*
|
|
10
|
+
* Provides full traceability for debugging and meta-evaluation.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { warn } from './logger.mjs';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Experience Trace
|
|
17
|
+
*
|
|
18
|
+
* Complete trace of a user experience journey with all events, validations, and state changes.
|
|
19
|
+
*
|
|
20
|
+
* @class ExperienceTrace
|
|
21
|
+
*/
|
|
22
|
+
export class ExperienceTrace {
|
|
23
|
+
/**
|
|
24
|
+
* @param {string} sessionId - Unique session identifier
|
|
25
|
+
* @param {import('./index.mjs').Persona | null} [persona=null] - Persona configuration
|
|
26
|
+
*/
|
|
27
|
+
constructor(sessionId, persona = null) {
|
|
28
|
+
this.sessionId = sessionId;
|
|
29
|
+
this.persona = persona;
|
|
30
|
+
this.startTime = Date.now();
|
|
31
|
+
this.events = [];
|
|
32
|
+
this.validations = [];
|
|
33
|
+
this.screenshots = [];
|
|
34
|
+
this.stateHistory = [];
|
|
35
|
+
this.aggregatedNotes = null;
|
|
36
|
+
this.metaEvaluation = null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Add an experience event
|
|
41
|
+
*
|
|
42
|
+
* @param {string} type - Event type ('interaction', 'state-change', 'validation', 'screenshot')
|
|
43
|
+
* @param {Record<string, unknown>} data - Event data
|
|
44
|
+
* @param {number | null} [timestamp=null] - Event timestamp (defaults to now)
|
|
45
|
+
* @returns {{ id: string; type: string; timestamp: number; elapsed: number; data: Record<string, unknown> }} Created event
|
|
46
|
+
*/
|
|
47
|
+
addEvent(type, data, timestamp = null) {
|
|
48
|
+
const event = {
|
|
49
|
+
id: `${this.sessionId}-${this.events.length}`,
|
|
50
|
+
type,
|
|
51
|
+
timestamp: timestamp || Date.now(),
|
|
52
|
+
elapsed: (timestamp || Date.now()) - this.startTime,
|
|
53
|
+
data
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
this.events.push(event);
|
|
57
|
+
return event;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Add VLLM validation result
|
|
62
|
+
*
|
|
63
|
+
* @param {import('./index.mjs').ValidationResult} validation - Validation result from validateScreenshot
|
|
64
|
+
* @param {Record<string, unknown>} [context={}] - Validation context (stage, interaction, etc.)
|
|
65
|
+
* @returns {{ id: string; timestamp: number; elapsed: number; validation: import('./index.mjs').ValidationResult; context: Record<string, unknown>; screenshot: string | null }} Validation event
|
|
66
|
+
*/
|
|
67
|
+
addValidation(validation, context = {}) {
|
|
68
|
+
const validationEvent = {
|
|
69
|
+
id: `${this.sessionId}-validation-${this.validations.length}`,
|
|
70
|
+
timestamp: Date.now(),
|
|
71
|
+
elapsed: Date.now() - this.startTime,
|
|
72
|
+
validation,
|
|
73
|
+
context,
|
|
74
|
+
screenshot: validation.screenshotPath || null
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
this.validations.push(validationEvent);
|
|
78
|
+
this.addEvent('validation', { validationId: validationEvent.id, context });
|
|
79
|
+
|
|
80
|
+
return validationEvent;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Add screenshot capture
|
|
85
|
+
*
|
|
86
|
+
* @param {string} path - Screenshot path
|
|
87
|
+
* @param {string} step - Step name
|
|
88
|
+
* @param {Record<string, unknown>} [metadata={}] - Additional metadata
|
|
89
|
+
* @returns {{ id: string; path: string; step: string; timestamp: number; elapsed: number; [key: string]: unknown }} Screenshot event
|
|
90
|
+
*/
|
|
91
|
+
addScreenshot(path, step, metadata = {}) {
|
|
92
|
+
const screenshot = {
|
|
93
|
+
id: `${this.sessionId}-screenshot-${this.screenshots.length}`,
|
|
94
|
+
path,
|
|
95
|
+
step,
|
|
96
|
+
timestamp: Date.now(),
|
|
97
|
+
elapsed: Date.now() - this.startTime,
|
|
98
|
+
...metadata
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
this.screenshots.push(screenshot);
|
|
102
|
+
this.addEvent('screenshot', { screenshotId: screenshot.id, step });
|
|
103
|
+
|
|
104
|
+
return screenshot;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Add state snapshot
|
|
109
|
+
*
|
|
110
|
+
* @param {Record<string, unknown>} state - State object (pageState, gameState, etc.)
|
|
111
|
+
* @param {string} label - State label
|
|
112
|
+
* @returns {void}
|
|
113
|
+
*/
|
|
114
|
+
addStateSnapshot(state, label = '') {
|
|
115
|
+
const snapshot = {
|
|
116
|
+
id: `${this.sessionId}-state-${this.stateHistory.length}`,
|
|
117
|
+
timestamp: Date.now(),
|
|
118
|
+
elapsed: Date.now() - this.startTime,
|
|
119
|
+
state,
|
|
120
|
+
label
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
this.stateHistory.push(snapshot);
|
|
124
|
+
this.addEvent('state-change', { stateId: snapshot.id, label });
|
|
125
|
+
|
|
126
|
+
return snapshot;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Aggregate experience notes temporally
|
|
131
|
+
*
|
|
132
|
+
* @param {(notes: import('./index.mjs').TemporalNote[], options?: Record<string, unknown>) => import('./index.mjs').AggregatedTemporalNotes} aggregateTemporalNotes - Temporal aggregation function
|
|
133
|
+
* @param {Record<string, unknown>} [options={}] - Aggregation options
|
|
134
|
+
* @returns {import('./index.mjs').AggregatedTemporalNotes} Aggregated notes
|
|
135
|
+
*/
|
|
136
|
+
aggregateNotes(aggregateTemporalNotes, options = {}) {
|
|
137
|
+
// Extract notes from events and validations
|
|
138
|
+
const eventNotes = this.events
|
|
139
|
+
.filter(e => e.type === 'interaction' || e.type === 'observation')
|
|
140
|
+
.map(e => ({
|
|
141
|
+
step: e.data.step || e.type,
|
|
142
|
+
timestamp: e.timestamp,
|
|
143
|
+
elapsed: e.elapsed,
|
|
144
|
+
observation: e.data.observation || e.data.reasoning || '',
|
|
145
|
+
score: e.data.score || null,
|
|
146
|
+
gameState: e.data.gameState || null
|
|
147
|
+
}));
|
|
148
|
+
|
|
149
|
+
const validationNotes = this.validations.map(v => ({
|
|
150
|
+
step: `validation_${v.context.stage || 'unknown'}`,
|
|
151
|
+
timestamp: v.timestamp,
|
|
152
|
+
elapsed: v.elapsed,
|
|
153
|
+
observation: v.validation.reasoning || v.validation.issues?.join(' ') || '',
|
|
154
|
+
score: v.validation.score || null,
|
|
155
|
+
gameState: v.context.gameState || null
|
|
156
|
+
}));
|
|
157
|
+
|
|
158
|
+
const notes = [...eventNotes, ...validationNotes].sort((a, b) => a.timestamp - b.timestamp);
|
|
159
|
+
|
|
160
|
+
this.aggregatedNotes = aggregateTemporalNotes(notes, options);
|
|
161
|
+
return this.aggregatedNotes;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Get trace summary
|
|
166
|
+
*
|
|
167
|
+
* @returns {{
|
|
168
|
+
* sessionId: string;
|
|
169
|
+
* persona: import('./index.mjs').Persona | null;
|
|
170
|
+
* duration: number;
|
|
171
|
+
* eventCount: number;
|
|
172
|
+
* validationCount: number;
|
|
173
|
+
* screenshotCount: number;
|
|
174
|
+
* stateSnapshotCount: number;
|
|
175
|
+
* hasAggregatedNotes: boolean;
|
|
176
|
+
* hasMetaEvaluation: boolean;
|
|
177
|
+
* }} Trace summary
|
|
178
|
+
*/
|
|
179
|
+
getSummary() {
|
|
180
|
+
return {
|
|
181
|
+
sessionId: this.sessionId,
|
|
182
|
+
persona: this.persona,
|
|
183
|
+
duration: Date.now() - this.startTime,
|
|
184
|
+
eventCount: this.events.length,
|
|
185
|
+
validationCount: this.validations.length,
|
|
186
|
+
screenshotCount: this.screenshots.length,
|
|
187
|
+
stateSnapshotCount: this.stateHistory.length,
|
|
188
|
+
hasAggregatedNotes: !!this.aggregatedNotes,
|
|
189
|
+
hasMetaEvaluation: !!this.metaEvaluation
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Get full trace for debugging
|
|
195
|
+
*
|
|
196
|
+
* @returns {{
|
|
197
|
+
* sessionId: string;
|
|
198
|
+
* persona: import('./index.mjs').Persona | null;
|
|
199
|
+
* startTime: number;
|
|
200
|
+
* duration: number;
|
|
201
|
+
* events: Array<Record<string, unknown>>;
|
|
202
|
+
* validations: Array<Record<string, unknown>>;
|
|
203
|
+
* screenshots: Array<Record<string, unknown>>;
|
|
204
|
+
* stateHistory: Array<Record<string, unknown>>;
|
|
205
|
+
* aggregatedNotes: import('./index.mjs').AggregatedTemporalNotes | null;
|
|
206
|
+
* metaEvaluation: Record<string, unknown> | null;
|
|
207
|
+
* summary: Record<string, unknown>;
|
|
208
|
+
* }} Complete trace
|
|
209
|
+
*/
|
|
210
|
+
getFullTrace() {
|
|
211
|
+
return {
|
|
212
|
+
sessionId: this.sessionId,
|
|
213
|
+
persona: this.persona,
|
|
214
|
+
startTime: this.startTime,
|
|
215
|
+
duration: Date.now() - this.startTime,
|
|
216
|
+
events: this.events,
|
|
217
|
+
validations: this.validations,
|
|
218
|
+
screenshots: this.screenshots,
|
|
219
|
+
stateHistory: this.stateHistory,
|
|
220
|
+
aggregatedNotes: this.aggregatedNotes,
|
|
221
|
+
metaEvaluation: this.metaEvaluation,
|
|
222
|
+
summary: this.getSummary()
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Export trace to JSON
|
|
228
|
+
*
|
|
229
|
+
* @param {string} filePath - Path to save trace
|
|
230
|
+
* @returns {Promise<void>}
|
|
231
|
+
*/
|
|
232
|
+
async exportToJSON(filePath) {
|
|
233
|
+
const fs = await import('fs/promises');
|
|
234
|
+
await fs.writeFile(filePath, JSON.stringify(this.getFullTrace(), null, 2));
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Experience Tracer Manager
|
|
240
|
+
*
|
|
241
|
+
* Manages multiple experience traces and provides meta-evaluation capabilities.
|
|
242
|
+
*
|
|
243
|
+
* @class ExperienceTracerManager
|
|
244
|
+
*/
|
|
245
|
+
export class ExperienceTracerManager {
|
|
246
|
+
constructor() {
|
|
247
|
+
this.traces = new Map();
|
|
248
|
+
this.metaEvaluations = [];
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Create a new trace
|
|
253
|
+
*
|
|
254
|
+
* @param {string} sessionId - Unique session ID
|
|
255
|
+
* @param {import('./index.mjs').Persona | null} [persona=null] - Persona configuration
|
|
256
|
+
* @returns {ExperienceTrace} New trace
|
|
257
|
+
*/
|
|
258
|
+
createTrace(sessionId, persona = null) {
|
|
259
|
+
const trace = new ExperienceTrace(sessionId, persona);
|
|
260
|
+
this.traces.set(sessionId, trace);
|
|
261
|
+
return trace;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Get trace by session ID
|
|
266
|
+
*
|
|
267
|
+
* @param {string} sessionId - Session ID
|
|
268
|
+
* @returns {ExperienceTrace | null} Trace or null if not found
|
|
269
|
+
*/
|
|
270
|
+
getTrace(sessionId) {
|
|
271
|
+
return this.traces.get(sessionId) || null;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Get all traces
|
|
276
|
+
*
|
|
277
|
+
* @returns {ExperienceTrace[]} Array of all traces
|
|
278
|
+
*/
|
|
279
|
+
getAllTraces() {
|
|
280
|
+
return Array.from(this.traces.values());
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Meta-evaluate trace quality
|
|
285
|
+
*
|
|
286
|
+
* Evaluates the quality of the trace itself:
|
|
287
|
+
* - Completeness (are all events captured?)
|
|
288
|
+
* - Consistency (do validations align with events?)
|
|
289
|
+
* - Coverage (are all stages covered?)
|
|
290
|
+
* - Temporal coherence (do notes make sense over time?)
|
|
291
|
+
*
|
|
292
|
+
* @param {string} sessionId - Session ID
|
|
293
|
+
* @param {(path: string, prompt: string, context: import('./index.mjs').ValidationContext) => Promise<import('./index.mjs').ValidationResult>} validateScreenshot - VLLM validation function
|
|
294
|
+
* @returns {Promise<Record<string, unknown>>} Meta-evaluation result
|
|
295
|
+
*/
|
|
296
|
+
async metaEvaluateTrace(sessionId, validateScreenshot) {
|
|
297
|
+
const trace = this.getTrace(sessionId);
|
|
298
|
+
if (!trace) {
|
|
299
|
+
throw new Error(`Trace not found: ${sessionId}`);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const evaluation = {
|
|
303
|
+
sessionId,
|
|
304
|
+
timestamp: Date.now(),
|
|
305
|
+
completeness: this.evaluateCompleteness(trace),
|
|
306
|
+
consistency: this.evaluateConsistency(trace),
|
|
307
|
+
coverage: this.evaluateCoverage(trace),
|
|
308
|
+
temporalCoherence: this.evaluateTemporalCoherence(trace),
|
|
309
|
+
quality: null
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
// Calculate overall quality score
|
|
313
|
+
evaluation.quality = (
|
|
314
|
+
evaluation.completeness.score +
|
|
315
|
+
evaluation.consistency.score +
|
|
316
|
+
evaluation.coverage.score +
|
|
317
|
+
evaluation.temporalCoherence.score
|
|
318
|
+
) / 4;
|
|
319
|
+
|
|
320
|
+
// If we have screenshots, validate the trace visually
|
|
321
|
+
if (trace.screenshots.length > 0) {
|
|
322
|
+
const lastScreenshot = trace.screenshots[trace.screenshots.length - 1];
|
|
323
|
+
try {
|
|
324
|
+
const visualValidation = await validateScreenshot(
|
|
325
|
+
lastScreenshot.path,
|
|
326
|
+
`Meta-evaluate this experience trace. Check if:
|
|
327
|
+
- All stages are captured (initial, form, payment, gameplay)
|
|
328
|
+
- Screenshots show progression
|
|
329
|
+
- State changes are visible
|
|
330
|
+
- The experience is complete and traceable`,
|
|
331
|
+
{
|
|
332
|
+
testType: 'meta-evaluation',
|
|
333
|
+
trace: trace.getSummary()
|
|
334
|
+
}
|
|
335
|
+
);
|
|
336
|
+
evaluation.visualValidation = visualValidation;
|
|
337
|
+
} catch (e) {
|
|
338
|
+
warn('Visual meta-evaluation failed:', e.message);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
trace.metaEvaluation = evaluation;
|
|
343
|
+
this.metaEvaluations.push(evaluation);
|
|
344
|
+
|
|
345
|
+
return evaluation;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Evaluate trace completeness
|
|
350
|
+
*/
|
|
351
|
+
evaluateCompleteness(trace) {
|
|
352
|
+
const requiredEvents = ['screenshot', 'validation', 'state-change'];
|
|
353
|
+
const eventTypes = new Set(trace.events.map(e => e.type));
|
|
354
|
+
|
|
355
|
+
const missing = requiredEvents.filter(type => !eventTypes.has(type));
|
|
356
|
+
const completeness = 1 - (missing.length / requiredEvents.length);
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
score: completeness * 10,
|
|
360
|
+
missing,
|
|
361
|
+
eventTypes: Array.from(eventTypes),
|
|
362
|
+
totalEvents: trace.events.length
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Evaluate trace consistency
|
|
368
|
+
*/
|
|
369
|
+
evaluateConsistency(trace) {
|
|
370
|
+
// Check if validations align with events
|
|
371
|
+
const validationEvents = trace.events.filter(e => e.type === 'validation');
|
|
372
|
+
const validations = trace.validations;
|
|
373
|
+
|
|
374
|
+
const alignment = validations.length > 0 && validationEvents.length > 0
|
|
375
|
+
? Math.min(validations.length / validationEvents.length, 1)
|
|
376
|
+
: 0;
|
|
377
|
+
|
|
378
|
+
// Check if state changes are consistent
|
|
379
|
+
const stateChanges = trace.events.filter(e => e.type === 'state-change');
|
|
380
|
+
const stateSnapshots = trace.stateHistory;
|
|
381
|
+
|
|
382
|
+
const stateConsistency = stateChanges.length > 0 && stateSnapshots.length > 0
|
|
383
|
+
? Math.min(stateSnapshots.length / stateChanges.length, 1)
|
|
384
|
+
: 0;
|
|
385
|
+
|
|
386
|
+
const consistency = (alignment + stateConsistency) / 2;
|
|
387
|
+
|
|
388
|
+
return {
|
|
389
|
+
score: consistency * 10,
|
|
390
|
+
validationAlignment: alignment,
|
|
391
|
+
stateConsistency: stateConsistency,
|
|
392
|
+
validationCount: validations.length,
|
|
393
|
+
stateSnapshotCount: stateSnapshots.length
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Evaluate trace coverage
|
|
399
|
+
*/
|
|
400
|
+
evaluateCoverage(trace) {
|
|
401
|
+
const expectedStages = ['initial', 'form', 'payment', 'gameplay'];
|
|
402
|
+
const stageLabels = trace.events
|
|
403
|
+
.map(e => e.data.stage || e.data.step || '')
|
|
404
|
+
.filter(s => s);
|
|
405
|
+
|
|
406
|
+
const coveredStages = expectedStages.filter(stage =>
|
|
407
|
+
stageLabels.some(label => label.toLowerCase().includes(stage))
|
|
408
|
+
);
|
|
409
|
+
|
|
410
|
+
const coverage = coveredStages.length / expectedStages.length;
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
score: coverage * 10,
|
|
414
|
+
expectedStages,
|
|
415
|
+
coveredStages,
|
|
416
|
+
coverage: coverage
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Evaluate temporal coherence
|
|
422
|
+
*/
|
|
423
|
+
evaluateTemporalCoherence(trace) {
|
|
424
|
+
if (!trace.aggregatedNotes) {
|
|
425
|
+
return {
|
|
426
|
+
score: 0,
|
|
427
|
+
reason: 'No aggregated notes available'
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const coherence = trace.aggregatedNotes.coherence || 0;
|
|
432
|
+
const conflicts = trace.aggregatedNotes.conflicts || [];
|
|
433
|
+
|
|
434
|
+
return {
|
|
435
|
+
score: coherence * 10,
|
|
436
|
+
coherence: coherence,
|
|
437
|
+
conflicts: conflicts.length,
|
|
438
|
+
windows: trace.aggregatedNotes.windows?.length || 0
|
|
439
|
+
};
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
/**
|
|
443
|
+
* Get summary of all meta-evaluations
|
|
444
|
+
*
|
|
445
|
+
* @returns {{
|
|
446
|
+
* totalEvaluations: number;
|
|
447
|
+
* averageQuality: number | null;
|
|
448
|
+
* evaluations?: Array<Record<string, unknown>>;
|
|
449
|
+
* }} Meta-evaluation summary
|
|
450
|
+
*/
|
|
451
|
+
getMetaEvaluationSummary() {
|
|
452
|
+
if (this.metaEvaluations.length === 0) {
|
|
453
|
+
return {
|
|
454
|
+
totalEvaluations: 0,
|
|
455
|
+
averageQuality: null
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
const qualities = this.metaEvaluations
|
|
460
|
+
.map(e => e.quality)
|
|
461
|
+
.filter(q => q !== null);
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
totalEvaluations: this.metaEvaluations.length,
|
|
465
|
+
averageQuality: qualities.length > 0
|
|
466
|
+
? qualities.reduce((a, b) => a + b, 0) / qualities.length
|
|
467
|
+
: null,
|
|
468
|
+
evaluations: this.metaEvaluations
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Singleton instance
|
|
474
|
+
let tracerManager = null;
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Get or create tracer manager singleton
|
|
478
|
+
*
|
|
479
|
+
* @returns {ExperienceTracerManager} Tracer manager instance
|
|
480
|
+
*/
|
|
481
|
+
export function getTracerManager() {
|
|
482
|
+
if (!tracerManager) {
|
|
483
|
+
tracerManager = new ExperienceTracerManager();
|
|
484
|
+
}
|
|
485
|
+
return tracerManager;
|
|
486
|
+
}
|
|
487
|
+
|