@arclabs561/ai-visual-test 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.secretsignore.example +20 -0
- package/CHANGELOG.md +360 -0
- package/CONTRIBUTING.md +63 -0
- package/DEPLOYMENT.md +80 -0
- package/LICENSE +22 -0
- package/README.md +142 -0
- package/SECURITY.md +108 -0
- package/api/health.js +34 -0
- package/api/validate.js +252 -0
- package/index.d.ts +1221 -0
- package/package.json +112 -0
- package/public/index.html +149 -0
- package/src/batch-optimizer.mjs +451 -0
- package/src/bias-detector.mjs +370 -0
- package/src/bias-mitigation.mjs +233 -0
- package/src/cache.mjs +433 -0
- package/src/config.mjs +268 -0
- package/src/constants.mjs +80 -0
- package/src/context-compressor.mjs +350 -0
- package/src/convenience.mjs +617 -0
- package/src/cost-tracker.mjs +257 -0
- package/src/cross-modal-consistency.mjs +170 -0
- package/src/data-extractor.mjs +232 -0
- package/src/dynamic-few-shot.mjs +140 -0
- package/src/dynamic-prompts.mjs +361 -0
- package/src/ensemble/index.mjs +53 -0
- package/src/ensemble-judge.mjs +366 -0
- package/src/error-handler.mjs +67 -0
- package/src/errors.mjs +167 -0
- package/src/experience-propagation.mjs +128 -0
- package/src/experience-tracer.mjs +487 -0
- package/src/explanation-manager.mjs +299 -0
- package/src/feedback-aggregator.mjs +248 -0
- package/src/game-goal-prompts.mjs +478 -0
- package/src/game-player.mjs +548 -0
- package/src/hallucination-detector.mjs +155 -0
- package/src/helpers/playwright.mjs +80 -0
- package/src/human-validation-manager.mjs +516 -0
- package/src/index.mjs +364 -0
- package/src/judge.mjs +929 -0
- package/src/latency-aware-batch-optimizer.mjs +192 -0
- package/src/load-env.mjs +159 -0
- package/src/logger.mjs +55 -0
- package/src/metrics.mjs +187 -0
- package/src/model-tier-selector.mjs +221 -0
- package/src/multi-modal/index.mjs +36 -0
- package/src/multi-modal-fusion.mjs +190 -0
- package/src/multi-modal.mjs +524 -0
- package/src/natural-language-specs.mjs +1071 -0
- package/src/pair-comparison.mjs +277 -0
- package/src/persona/index.mjs +42 -0
- package/src/persona-enhanced.mjs +200 -0
- package/src/persona-experience.mjs +572 -0
- package/src/position-counterbalance.mjs +140 -0
- package/src/prompt-composer.mjs +375 -0
- package/src/render-change-detector.mjs +583 -0
- package/src/research-enhanced-validation.mjs +436 -0
- package/src/retry.mjs +152 -0
- package/src/rubrics.mjs +231 -0
- package/src/score-tracker.mjs +277 -0
- package/src/smart-validator.mjs +447 -0
- package/src/spec-config.mjs +106 -0
- package/src/spec-templates.mjs +347 -0
- package/src/specs/index.mjs +38 -0
- package/src/temporal/index.mjs +102 -0
- package/src/temporal-adaptive.mjs +163 -0
- package/src/temporal-batch-optimizer.mjs +222 -0
- package/src/temporal-constants.mjs +69 -0
- package/src/temporal-context.mjs +49 -0
- package/src/temporal-decision-manager.mjs +271 -0
- package/src/temporal-decision.mjs +669 -0
- package/src/temporal-errors.mjs +58 -0
- package/src/temporal-note-pruner.mjs +173 -0
- package/src/temporal-preprocessor.mjs +543 -0
- package/src/temporal-prompt-formatter.mjs +219 -0
- package/src/temporal-validation.mjs +159 -0
- package/src/temporal.mjs +415 -0
- package/src/type-guards.mjs +311 -0
- package/src/uncertainty-reducer.mjs +470 -0
- package/src/utils/index.mjs +175 -0
- package/src/validation-framework.mjs +321 -0
- package/src/validation-result-normalizer.mjs +64 -0
- package/src/validation.mjs +243 -0
- package/src/validators/accessibility-programmatic.mjs +345 -0
- package/src/validators/accessibility-validator.mjs +223 -0
- package/src/validators/batch-validator.mjs +143 -0
- package/src/validators/hybrid-validator.mjs +268 -0
- package/src/validators/index.mjs +34 -0
- package/src/validators/prompt-builder.mjs +218 -0
- package/src/validators/rubric.mjs +85 -0
- package/src/validators/state-programmatic.mjs +260 -0
- package/src/validators/state-validator.mjs +291 -0
- package/vercel.json +27 -0
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Temporal Decision-Making
|
|
3
|
+
*
|
|
4
|
+
* Implements multi-scale temporal aggregation for LLM evaluations:
|
|
5
|
+
* - Multi-scale temporal aggregation (0.1s to 60s+)
|
|
6
|
+
* - Sequential decision context
|
|
7
|
+
* - Human perception time modeling
|
|
8
|
+
* - Attention-based weighting
|
|
9
|
+
*
|
|
10
|
+
* Research context:
|
|
11
|
+
* - Efficient Sequential Decision Making (arXiv:2406.12125) - Paper focuses on online
|
|
12
|
+
* model selection achieving 6x performance gain with 1.5% LLM call rate. Our implementation
|
|
13
|
+
* uses multi-scale temporal aggregation (inspired by temporal aspects) but does NOT
|
|
14
|
+
* implement the paper's core online model selection algorithm or decision logic for
|
|
15
|
+
* when to prompt. We cite this for temporal awareness concepts, not the core algorithm.
|
|
16
|
+
* - Human Time Perception (PMC research) - Human perception time scales
|
|
17
|
+
* - Powers of 10: Time Scales in UX (NN/g) - UX time scale research
|
|
18
|
+
*
|
|
19
|
+
* IMPORTANT: This module implements temporal aggregation and attention-based weighting,
|
|
20
|
+
* NOT the adaptive LLM calling strategy or decision logic from arXiv:2406.12125.
|
|
21
|
+
* The paper's core contribution (online model selection, when-to-prompt decisions) is
|
|
22
|
+
* NOT implemented here. We use temporal concepts inspired by the paper's temporal aspects.
|
|
23
|
+
*
|
|
24
|
+
* @module temporal-decision
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import {
|
|
28
|
+
TIME_SCALES,
|
|
29
|
+
MULTI_SCALE_WINDOWS,
|
|
30
|
+
READING_SPEEDS,
|
|
31
|
+
ATTENTION_MULTIPLIERS,
|
|
32
|
+
COMPLEXITY_MULTIPLIERS,
|
|
33
|
+
CONFIDENCE_THRESHOLDS,
|
|
34
|
+
TIME_BOUNDS,
|
|
35
|
+
CONTENT_THRESHOLDS
|
|
36
|
+
} from './temporal-constants.mjs';
|
|
37
|
+
import { validateAndSortNotes, validateTimeScales, validateAction, validatePerceptionContext, validateSequentialContextOptions } from './temporal-validation.mjs';
|
|
38
|
+
import { MultiScaleError, PerceptionTimeError } from './temporal-errors.mjs';
|
|
39
|
+
import { warn, log } from './logger.mjs';
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Multi-scale temporal aggregation
|
|
43
|
+
* Uses multiple time scales to capture different aspects of human perception
|
|
44
|
+
*/
|
|
45
|
+
export function aggregateMultiScale(notes, options = {}) {
|
|
46
|
+
// Validate and sort inputs
|
|
47
|
+
const sortedNotes = validateAndSortNotes(notes);
|
|
48
|
+
|
|
49
|
+
const {
|
|
50
|
+
timeScales = MULTI_SCALE_WINDOWS,
|
|
51
|
+
attentionWeights = true
|
|
52
|
+
} = options;
|
|
53
|
+
|
|
54
|
+
// Validate time scales
|
|
55
|
+
validateTimeScales(timeScales);
|
|
56
|
+
|
|
57
|
+
if (sortedNotes.length === 0) {
|
|
58
|
+
return {
|
|
59
|
+
scales: {},
|
|
60
|
+
summary: 'No notes available',
|
|
61
|
+
coherence: {}
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const startTime = sortedNotes[0].timestamp || Date.now();
|
|
66
|
+
const scales = {};
|
|
67
|
+
|
|
68
|
+
// Aggregate at each time scale
|
|
69
|
+
for (const [scaleName, windowSize] of Object.entries(timeScales)) {
|
|
70
|
+
const windows = [];
|
|
71
|
+
|
|
72
|
+
for (const note of sortedNotes) {
|
|
73
|
+
const elapsed = note.elapsed || (note.timestamp - startTime);
|
|
74
|
+
const windowIndex = Math.floor(elapsed / windowSize);
|
|
75
|
+
|
|
76
|
+
if (!windows[windowIndex]) {
|
|
77
|
+
windows[windowIndex] = {
|
|
78
|
+
index: windowIndex,
|
|
79
|
+
startTime: startTime + (windowIndex * windowSize),
|
|
80
|
+
endTime: startTime + ((windowIndex + 1) * windowSize),
|
|
81
|
+
notes: [],
|
|
82
|
+
weightedScore: 0,
|
|
83
|
+
totalWeight: 0
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Attention-based weighting
|
|
88
|
+
const weight = attentionWeights
|
|
89
|
+
? calculateAttentionWeight(note, { elapsed, windowSize, scaleName })
|
|
90
|
+
: 1.0;
|
|
91
|
+
|
|
92
|
+
windows[windowIndex].notes.push({ ...note, weight });
|
|
93
|
+
|
|
94
|
+
const score = note.gameState?.score || note.score || 0;
|
|
95
|
+
windows[windowIndex].weightedScore += score * weight;
|
|
96
|
+
windows[windowIndex].totalWeight += weight;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// NOTE: windows is a sparse array (indexed by windowIndex), so we need to filter
|
|
100
|
+
// out undefined entries before mapping to ensure all windows have avgScore
|
|
101
|
+
// This prevents "Cannot read properties of undefined (reading 'avgScore')" errors
|
|
102
|
+
const definedWindows = windows.filter(w => w !== undefined);
|
|
103
|
+
|
|
104
|
+
scales[scaleName] = {
|
|
105
|
+
windowSize,
|
|
106
|
+
windows: definedWindows.map(w => ({
|
|
107
|
+
window: w.index,
|
|
108
|
+
timeRange: `${Math.round((w.startTime - startTime) / 1000)}s-${Math.round((w.endTime - startTime) / 1000)}s`,
|
|
109
|
+
avgScore: w.totalWeight > 0 ? w.weightedScore / w.totalWeight : 0,
|
|
110
|
+
noteCount: w.notes.length
|
|
111
|
+
})),
|
|
112
|
+
coherence: calculateCoherenceForScale(definedWindows)
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
scales,
|
|
118
|
+
summary: generateMultiScaleSummary(scales),
|
|
119
|
+
coherence: Object.fromEntries(
|
|
120
|
+
Object.entries(scales).map(([name, scale]) => [name, scale.coherence])
|
|
121
|
+
)
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Calculate attention-based weight
|
|
127
|
+
* Models how human attention affects temporal perception
|
|
128
|
+
*
|
|
129
|
+
* @param {import('./index.mjs').TemporalNote} note - Temporal note
|
|
130
|
+
* @param {Object} context - Context with elapsed, windowSize, scaleName
|
|
131
|
+
* @returns {number} Attention weight
|
|
132
|
+
*/
|
|
133
|
+
export function calculateAttentionWeight(note, context) {
|
|
134
|
+
const { elapsed, windowSize, scaleName } = context;
|
|
135
|
+
|
|
136
|
+
// Base recency weight (exponential decay)
|
|
137
|
+
const recencyWeight = Math.pow(0.9, elapsed / windowSize);
|
|
138
|
+
|
|
139
|
+
// Salience weight (important events get more attention)
|
|
140
|
+
const salienceWeight = calculateSalience(note);
|
|
141
|
+
|
|
142
|
+
// Action weight (user actions focus attention)
|
|
143
|
+
const actionWeight = note.step?.includes('interaction') || note.step?.includes('click')
|
|
144
|
+
? 1.5
|
|
145
|
+
: 1.0;
|
|
146
|
+
|
|
147
|
+
// Novelty weight (context changes attract attention)
|
|
148
|
+
const noveltyWeight = note.observation?.includes('change') || note.observation?.includes('new')
|
|
149
|
+
? 1.3
|
|
150
|
+
: 1.0;
|
|
151
|
+
|
|
152
|
+
return recencyWeight * salienceWeight * actionWeight * noveltyWeight;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Calculate salience (importance) of a note
|
|
157
|
+
*/
|
|
158
|
+
function calculateSalience(note) {
|
|
159
|
+
let salience = 1.0;
|
|
160
|
+
|
|
161
|
+
// High scores or low scores are more salient
|
|
162
|
+
const score = note.score || note.gameState?.score || 5;
|
|
163
|
+
if (score >= 8 || score <= 2) {
|
|
164
|
+
salience *= 1.5;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Issues mentioned increase salience
|
|
168
|
+
if (note.issues && note.issues.length > 0) {
|
|
169
|
+
salience *= 1.2;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Critical keywords increase salience
|
|
173
|
+
const criticalKeywords = ['error', 'broken', 'fail', 'critical', 'important'];
|
|
174
|
+
const observation = (note.observation || '').toLowerCase();
|
|
175
|
+
if (criticalKeywords.some(kw => observation.includes(kw))) {
|
|
176
|
+
salience *= 1.3;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return salience;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Calculate coherence for a specific time scale
|
|
184
|
+
*/
|
|
185
|
+
function calculateCoherenceForScale(windows) {
|
|
186
|
+
if (windows.length < 2) return 1.0;
|
|
187
|
+
|
|
188
|
+
const scores = windows.map(w =>
|
|
189
|
+
w.totalWeight > 0 ? w.weightedScore / w.totalWeight : 0
|
|
190
|
+
).filter(s => !isNaN(s) && isFinite(s));
|
|
191
|
+
|
|
192
|
+
// Direction consistency calculation
|
|
193
|
+
// Need at least 2 scores to calculate direction
|
|
194
|
+
if (scores.length < 2) return 1.0;
|
|
195
|
+
|
|
196
|
+
// Calculate trends (direction of change between consecutive scores)
|
|
197
|
+
const trends = [];
|
|
198
|
+
for (let i = 1; i < scores.length; i++) {
|
|
199
|
+
const change = scores[i] - scores[i - 1];
|
|
200
|
+
trends.push(change >= 0 ? 1 : -1);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Count direction changes
|
|
204
|
+
let directionChanges = 0;
|
|
205
|
+
for (let i = 1; i < trends.length; i++) {
|
|
206
|
+
if (trends[i] !== trends[i - 1]) {
|
|
207
|
+
directionChanges++;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
const directionConsistency = Math.max(0, Math.min(1, 1.0 - (directionChanges / Math.max(1, trends.length))));
|
|
211
|
+
|
|
212
|
+
// Use stricter variance normalization (same as temporal.mjs)
|
|
213
|
+
const meanScore = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
214
|
+
const variance = scores.reduce((sum, s) => sum + Math.pow(s - meanScore, 2), 0) / scores.length;
|
|
215
|
+
|
|
216
|
+
// Use score range to determine max variance, not meanScore^2
|
|
217
|
+
const scoreRange = Math.max(...scores) - Math.min(...scores);
|
|
218
|
+
const maxVariance = Math.max(
|
|
219
|
+
Math.pow(scoreRange / 2, 2),
|
|
220
|
+
Math.pow(meanScore * 0.5, 2),
|
|
221
|
+
10
|
|
222
|
+
);
|
|
223
|
+
const varianceCoherence = Math.max(0, Math.min(1, 1.0 - (variance / maxVariance)));
|
|
224
|
+
|
|
225
|
+
// Add stability metric
|
|
226
|
+
const maxPossibleChanges = Math.max(1, scores.length - 2);
|
|
227
|
+
const stability = Math.max(0, Math.min(1, 1.0 - (directionChanges / maxPossibleChanges)));
|
|
228
|
+
|
|
229
|
+
// Updated weights: direction 0.4, stability 0.3, variance 0.3
|
|
230
|
+
const coherence = directionConsistency * 0.4 + stability * 0.3 + varianceCoherence * 0.3;
|
|
231
|
+
|
|
232
|
+
// Clamp to [0, 1] and handle NaN/Infinity
|
|
233
|
+
const clamped = Math.max(0, Math.min(1, isNaN(coherence) || !isFinite(coherence) ? 0.5 : coherence));
|
|
234
|
+
return clamped;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Generate summary across multiple time scales
|
|
239
|
+
*/
|
|
240
|
+
function generateMultiScaleSummary(scales) {
|
|
241
|
+
const parts = [];
|
|
242
|
+
|
|
243
|
+
for (const [scaleName, scale] of Object.entries(scales)) {
|
|
244
|
+
if (scale && scale.windows && scale.windows.length > 0) {
|
|
245
|
+
const firstWindow = scale.windows[0];
|
|
246
|
+
const lastWindow = scale.windows[scale.windows.length - 1];
|
|
247
|
+
|
|
248
|
+
// Defensive check: windows might not have avgScore if they're empty
|
|
249
|
+
if (firstWindow && lastWindow &&
|
|
250
|
+
firstWindow.avgScore !== undefined &&
|
|
251
|
+
lastWindow.avgScore !== undefined) {
|
|
252
|
+
const first = firstWindow.avgScore;
|
|
253
|
+
const last = lastWindow.avgScore;
|
|
254
|
+
const coherence = scale.coherence !== undefined ? scale.coherence : 0;
|
|
255
|
+
parts.push(`${scaleName} scale (${scale.windowSize}ms): ${first.toFixed(1)} → ${last.toFixed(1)}, coherence: ${(coherence * 100).toFixed(0)}%`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return parts.join('; ');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Sequential Decision Context
|
|
265
|
+
* Maintains context across LLM calls for better sequential decision-making
|
|
266
|
+
*/
|
|
267
|
+
export class SequentialDecisionContext {
|
|
268
|
+
constructor(options = {}) {
|
|
269
|
+
// Validate options
|
|
270
|
+
validateSequentialContextOptions(options);
|
|
271
|
+
|
|
272
|
+
this.history = [];
|
|
273
|
+
this.currentState = null;
|
|
274
|
+
this.adaptations = {};
|
|
275
|
+
this.maxHistory = options.maxHistory || 10;
|
|
276
|
+
// NOTE: Default to false based on evaluation data showing sequential context increases variance
|
|
277
|
+
// Evaluation data (data-driven-analysis-1762832349830.json) shows:
|
|
278
|
+
// - Isolated variance: 0.231
|
|
279
|
+
// - Sequential variance: 0.324 (40% increase)
|
|
280
|
+
// Research shows sequential context can increase variance due to prompt brittleness, attention variability
|
|
281
|
+
// Users should explicitly enable if they need sequential context, understanding the variance trade-off
|
|
282
|
+
this.adaptationEnabled = options.adaptationEnabled === true;
|
|
283
|
+
this.varianceTracking = options.varianceTracking !== false; // Track variance by default
|
|
284
|
+
this.baselineVariance = null; // Will be set after first few isolated evaluations
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Add decision to history
|
|
289
|
+
*/
|
|
290
|
+
addDecision(decision) {
|
|
291
|
+
this.history.push({
|
|
292
|
+
...decision,
|
|
293
|
+
timestamp: Date.now(),
|
|
294
|
+
index: this.history.length
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Keep only recent history
|
|
298
|
+
if (this.history.length > this.maxHistory) {
|
|
299
|
+
this.history.shift();
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Update current state
|
|
303
|
+
this.currentState = decision;
|
|
304
|
+
|
|
305
|
+
// Track baseline variance for first few isolated evaluations (before sequential context kicks in)
|
|
306
|
+
// This allows us to detect if sequential context increases variance
|
|
307
|
+
// Research shows sequential context can increase variance by 40%+ due to prompt brittleness,
|
|
308
|
+
// attention variability, and few-shot learning instability (up to 14% variance from example selection)
|
|
309
|
+
if (this.varianceTracking && this.history.length >= 3 && this.baselineVariance === null) {
|
|
310
|
+
const scores = this.history.map(d => d.score).filter(s => s !== null);
|
|
311
|
+
if (scores.length >= 3) {
|
|
312
|
+
this.baselineVariance = calculateVariance(scores);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Adapt prompt based on history
|
|
319
|
+
*/
|
|
320
|
+
adaptPrompt(basePrompt, currentContext) {
|
|
321
|
+
if (!this.adaptationEnabled || this.history.length === 0) {
|
|
322
|
+
return basePrompt;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Identify patterns in history
|
|
326
|
+
const patterns = this.identifyPatterns();
|
|
327
|
+
|
|
328
|
+
// NOTE: Check if variance has increased (evaluation data shows sequential context can increase variance)
|
|
329
|
+
// If variance tracking is enabled and variance has increased significantly, disable adaptation
|
|
330
|
+
// VERIFIABLE: Variance increase is always logged (not just in verbose mode) and tracked in metrics
|
|
331
|
+
// ENHANCEMENT: Also track variance decreases (improvements) for completeness
|
|
332
|
+
if (this.varianceTracking && this.baselineVariance !== null && patterns.scoreVariance) {
|
|
333
|
+
const varianceChange = (patterns.scoreVariance - this.baselineVariance) / this.baselineVariance;
|
|
334
|
+
// If variance increased by more than 20%, disable adaptation to prevent further degradation
|
|
335
|
+
if (varianceChange > 0.2) {
|
|
336
|
+
// VERIFIABLE: Always log variance increase (not just in verbose mode) - this is a critical metric
|
|
337
|
+
warn(`[SequentialContext] Variance increased by ${(varianceChange * 100).toFixed(1)}% (${this.baselineVariance.toFixed(3)} → ${patterns.scoreVariance.toFixed(3)}). Disabling adaptation to prevent further degradation.`);
|
|
338
|
+
// Track variance increase event for metrics
|
|
339
|
+
if (!this.varianceIncreaseEvents) {
|
|
340
|
+
this.varianceIncreaseEvents = [];
|
|
341
|
+
}
|
|
342
|
+
this.varianceIncreaseEvents.push({
|
|
343
|
+
timestamp: Date.now(),
|
|
344
|
+
baselineVariance: this.baselineVariance,
|
|
345
|
+
currentVariance: patterns.scoreVariance,
|
|
346
|
+
increasePercent: varianceChange * 100,
|
|
347
|
+
historyLength: this.history.length
|
|
348
|
+
});
|
|
349
|
+
// Temporarily disable adaptation for this prompt
|
|
350
|
+
return basePrompt;
|
|
351
|
+
}
|
|
352
|
+
// ENHANCEMENT: Track variance decreases (improvements) - MCP research shows this is valuable
|
|
353
|
+
// Variance decrease indicates improved model stability
|
|
354
|
+
if (varianceChange < -0.1) { // 10% decrease threshold
|
|
355
|
+
log(`[SequentialContext] Variance decreased by ${Math.abs(varianceChange * 100).toFixed(1)}% (${this.baselineVariance.toFixed(3)} → ${patterns.scoreVariance.toFixed(3)}). Model stability improved.`);
|
|
356
|
+
// Track variance decrease for metrics (could add separate array, but using same structure for now)
|
|
357
|
+
if (!this.varianceIncreaseEvents) {
|
|
358
|
+
this.varianceIncreaseEvents = [];
|
|
359
|
+
}
|
|
360
|
+
this.varianceIncreaseEvents.push({
|
|
361
|
+
timestamp: Date.now(),
|
|
362
|
+
baselineVariance: this.baselineVariance,
|
|
363
|
+
currentVariance: patterns.scoreVariance,
|
|
364
|
+
increasePercent: varianceChange * 100, // Negative for decreases
|
|
365
|
+
historyLength: this.history.length,
|
|
366
|
+
type: 'decrease'
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// Build context from history
|
|
372
|
+
const historyContext = this.buildHistoryContext(patterns);
|
|
373
|
+
|
|
374
|
+
// Adapt prompt
|
|
375
|
+
return `${basePrompt}
|
|
376
|
+
|
|
377
|
+
## Previous Evaluation Context:
|
|
378
|
+
${historyContext}
|
|
379
|
+
|
|
380
|
+
## Adaptation Instructions:
|
|
381
|
+
${this.buildAdaptationInstructions(patterns, currentContext)}`;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Identify patterns in decision history
|
|
386
|
+
*/
|
|
387
|
+
identifyPatterns() {
|
|
388
|
+
if (this.history.length < 2) return {};
|
|
389
|
+
|
|
390
|
+
const scores = this.history.map(d => d.score).filter(s => s !== null);
|
|
391
|
+
const issues = this.history.flatMap(d => d.issues || []);
|
|
392
|
+
|
|
393
|
+
// Trend pattern
|
|
394
|
+
const trend = scores.length >= 2
|
|
395
|
+
? scores[scores.length - 1] > scores[scores.length - 2] ? 'improving' : 'declining'
|
|
396
|
+
: 'stable';
|
|
397
|
+
|
|
398
|
+
// Common issues
|
|
399
|
+
const issueCounts = {};
|
|
400
|
+
issues.forEach(issue => {
|
|
401
|
+
issueCounts[issue] = (issueCounts[issue] || 0) + 1;
|
|
402
|
+
});
|
|
403
|
+
const commonIssues = Object.entries(issueCounts)
|
|
404
|
+
.filter(([_, count]) => count >= 2)
|
|
405
|
+
.map(([issue, _]) => issue);
|
|
406
|
+
|
|
407
|
+
// Consistency
|
|
408
|
+
const scoreVariance = scores.length > 1
|
|
409
|
+
? calculateVariance(scores)
|
|
410
|
+
: 0;
|
|
411
|
+
const isConsistent = scoreVariance < 2.0;
|
|
412
|
+
|
|
413
|
+
return {
|
|
414
|
+
trend,
|
|
415
|
+
commonIssues,
|
|
416
|
+
isConsistent,
|
|
417
|
+
scoreVariance,
|
|
418
|
+
recentScores: scores.slice(-3)
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Build history context for prompt
|
|
424
|
+
*/
|
|
425
|
+
buildHistoryContext(patterns) {
|
|
426
|
+
const parts = [];
|
|
427
|
+
|
|
428
|
+
if (this.history.length > 0) {
|
|
429
|
+
const recent = this.history.slice(-3);
|
|
430
|
+
parts.push(`Recent evaluations (${this.history.length} total):`);
|
|
431
|
+
recent.forEach((d, i) => {
|
|
432
|
+
parts.push(` ${i + 1}. Score: ${d.score?.toFixed(1) || 'N/A'}/10, Issues: ${(d.issues || []).length}`);
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (patterns.trend) {
|
|
437
|
+
parts.push(`Trend: ${patterns.trend}`);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
if (patterns.commonIssues.length > 0) {
|
|
441
|
+
parts.push(`Recurring issues: ${patterns.commonIssues.join(', ')}`);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
if (!patterns.isConsistent) {
|
|
445
|
+
parts.push(`Warning: Inconsistent scores detected (variance: ${patterns.scoreVariance.toFixed(2)})`);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return parts.join('\n');
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Build adaptation instructions
|
|
453
|
+
* Data-driven: Adaptive confidence thresholds based on experimental findings
|
|
454
|
+
* Research shows sequential context can increase variance if over-applied
|
|
455
|
+
*/
|
|
456
|
+
buildAdaptationInstructions(patterns, currentContext) {
|
|
457
|
+
const instructions = [];
|
|
458
|
+
|
|
459
|
+
// Calculate confidence level based on variance and pattern strength
|
|
460
|
+
const variance = patterns.scoreVariance || 0;
|
|
461
|
+
const hasStrongPatterns = patterns.commonIssues.length > 0;
|
|
462
|
+
const confidence = variance < CONFIDENCE_THRESHOLDS.HIGH_VARIANCE && hasStrongPatterns ? 'high' :
|
|
463
|
+
variance < CONFIDENCE_THRESHOLDS.MEDIUM_VARIANCE || hasStrongPatterns ? 'medium' : 'low';
|
|
464
|
+
|
|
465
|
+
// Only add strong instructions when confidence is high (data shows over-correction)
|
|
466
|
+
if (patterns.trend === 'declining' && confidence === 'high') {
|
|
467
|
+
instructions.push('Previous evaluations showed declining quality. Pay special attention to issues.');
|
|
468
|
+
} else if (patterns.trend === 'declining' && confidence === 'medium') {
|
|
469
|
+
instructions.push('Previous evaluations showed a slight decline. Consider checking for issues.');
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if (patterns.commonIssues.length > 0) {
|
|
473
|
+
if (confidence === 'high') {
|
|
474
|
+
instructions.push(`Look for these recurring issues: ${patterns.commonIssues.join(', ')}`);
|
|
475
|
+
} else if (confidence === 'medium') {
|
|
476
|
+
instructions.push(`These issues appeared in previous evaluations: ${patterns.commonIssues.join(', ')}. Consider checking for them.`);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
if (!patterns.isConsistent) {
|
|
481
|
+
instructions.push('Previous evaluations were inconsistent. Be especially careful and thorough.');
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Always provide context but emphasize independence (data shows context can increase variance)
|
|
485
|
+
// Use gentler language for lower confidence
|
|
486
|
+
if (patterns.recentScores.length > 0) {
|
|
487
|
+
const avgRecent = patterns.recentScores.reduce((a, b) => a + b, 0) / patterns.recentScores.length;
|
|
488
|
+
if (confidence === 'high') {
|
|
489
|
+
instructions.push(`Recent average score: ${avgRecent.toFixed(1)}/10. Use this as context but evaluate independently.`);
|
|
490
|
+
} else {
|
|
491
|
+
instructions.push(`Recent evaluations averaged ${avgRecent.toFixed(1)}/10. Evaluate independently based on current screenshot.`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return instructions.length > 0
|
|
496
|
+
? instructions.join('\n')
|
|
497
|
+
: 'Evaluate independently, but consider previous context for consistency.';
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* Get context for current decision
|
|
502
|
+
*
|
|
503
|
+
* VERIFIABLE: Returns variance metrics to verify claims about variance tracking
|
|
504
|
+
*/
|
|
505
|
+
getContext() {
|
|
506
|
+
const patterns = this.identifyPatterns();
|
|
507
|
+
return {
|
|
508
|
+
historyLength: this.history.length,
|
|
509
|
+
recentDecisions: this.history.slice(-3),
|
|
510
|
+
patterns,
|
|
511
|
+
// VERIFIABLE: Export variance metrics to verify variance tracking claims
|
|
512
|
+
varianceMetrics: this.varianceTracking ? {
|
|
513
|
+
baselineVariance: this.baselineVariance,
|
|
514
|
+
currentVariance: patterns.scoreVariance,
|
|
515
|
+
varianceIncrease: this.baselineVariance !== null && patterns.scoreVariance
|
|
516
|
+
? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) * 100
|
|
517
|
+
: null,
|
|
518
|
+
varianceIncreaseEvents: this.varianceIncreaseEvents || [],
|
|
519
|
+
adaptationEnabled: this.adaptationEnabled,
|
|
520
|
+
adaptationDisabledDueToVariance: this.baselineVariance !== null && patterns.scoreVariance
|
|
521
|
+
? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) > 0.2
|
|
522
|
+
: false
|
|
523
|
+
} : null
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* Get variance statistics for verification
|
|
529
|
+
*
|
|
530
|
+
* VERIFIABLE: Exports variance metrics to verify claims about variance increase detection
|
|
531
|
+
*
|
|
532
|
+
* @returns {Object} Variance statistics
|
|
533
|
+
*/
|
|
534
|
+
getVarianceStats() {
|
|
535
|
+
if (!this.varianceTracking) {
|
|
536
|
+
return { trackingEnabled: false };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
const patterns = this.identifyPatterns();
|
|
540
|
+
return {
|
|
541
|
+
trackingEnabled: true,
|
|
542
|
+
baselineVariance: this.baselineVariance,
|
|
543
|
+
currentVariance: patterns.scoreVariance,
|
|
544
|
+
varianceIncrease: this.baselineVariance !== null && patterns.scoreVariance
|
|
545
|
+
? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) * 100
|
|
546
|
+
: null,
|
|
547
|
+
varianceIncreaseEvents: this.varianceIncreaseEvents || [],
|
|
548
|
+
adaptationEnabled: this.adaptationEnabled,
|
|
549
|
+
historyLength: this.history.length,
|
|
550
|
+
scores: this.history.map(d => d.score).filter(s => s !== null)
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Human Perception Time Modeling
|
|
557
|
+
* Models human perception at different time scales
|
|
558
|
+
* Based on research:
|
|
559
|
+
* - 0.1s threshold for direct manipulation (NN/g)
|
|
560
|
+
* - 50ms for visual appeal decisions (Lindgaard research)
|
|
561
|
+
* - 200-300 words/minute reading speed
|
|
562
|
+
* - Attention affects temporal perception
|
|
563
|
+
*/
|
|
564
|
+
export function humanPerceptionTime(action, context = {}) {
|
|
565
|
+
// Validate inputs
|
|
566
|
+
validateAction(action);
|
|
567
|
+
validatePerceptionContext(context);
|
|
568
|
+
|
|
569
|
+
const {
|
|
570
|
+
persona = null,
|
|
571
|
+
attentionLevel = 'normal',
|
|
572
|
+
actionComplexity = 'normal',
|
|
573
|
+
contentLength = 0
|
|
574
|
+
} = context;
|
|
575
|
+
|
|
576
|
+
// Base times from research (NN/g, PMC, Lindgaard)
|
|
577
|
+
const baseTimes = {
|
|
578
|
+
instant: TIME_SCALES.INSTANT,
|
|
579
|
+
visualDecision: TIME_SCALES.VISUAL_DECISION,
|
|
580
|
+
quick: TIME_SCALES.QUICK,
|
|
581
|
+
normal: TIME_SCALES.NORMAL,
|
|
582
|
+
extended: TIME_SCALES.EXTENDED
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
// Action-specific base times (research-aligned, calibrated)
|
|
586
|
+
const actionTimes = {
|
|
587
|
+
'page-load': baseTimes.normal,
|
|
588
|
+
'reading': calculateReadingTime(contentLength),
|
|
589
|
+
'interaction': baseTimes.quick,
|
|
590
|
+
'evaluation': baseTimes.extended,
|
|
591
|
+
'scanning': baseTimes.quick,
|
|
592
|
+
'visual-appeal': baseTimes.visualDecision
|
|
593
|
+
};
|
|
594
|
+
|
|
595
|
+
// Calibration: visual-appeal needs minimum 100ms (research says 50ms, but our implementation has minimum)
|
|
596
|
+
if (action === 'visual-appeal') {
|
|
597
|
+
let time = TIME_BOUNDS.MIN_PERCEPTION; // Start at minimum
|
|
598
|
+
if (attentionLevel === 'focused') time = 80;
|
|
599
|
+
if (attentionLevel === 'distracted') time = 120;
|
|
600
|
+
return Math.max(TIME_SCALES.VISUAL_DECISION, Math.min(200, time));
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
let time = actionTimes[action] || baseTimes.normal;
|
|
604
|
+
|
|
605
|
+
// Adjust for attention level (research: attention affects temporal perception)
|
|
606
|
+
time *= ATTENTION_MULTIPLIERS[attentionLevel] || 1.0;
|
|
607
|
+
|
|
608
|
+
// Adjust for action complexity
|
|
609
|
+
time *= COMPLEXITY_MULTIPLIERS[actionComplexity] || 1.0;
|
|
610
|
+
|
|
611
|
+
// Adjust for persona (if provided)
|
|
612
|
+
if (persona) {
|
|
613
|
+
// Fast personas (e.g., power users) are faster
|
|
614
|
+
// Slow personas (e.g., accessibility-focused) take more time
|
|
615
|
+
if (persona.name?.toLowerCase().includes('power') ||
|
|
616
|
+
persona.name?.toLowerCase().includes('expert')) {
|
|
617
|
+
time *= 0.8;
|
|
618
|
+
} else if (persona.name?.toLowerCase().includes('accessibility') ||
|
|
619
|
+
persona.name?.toLowerCase().includes('careful')) {
|
|
620
|
+
time *= 1.3; // Accessibility-focused users take more time
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Ensure minimum time based on research (0.1s for perception)
|
|
625
|
+
return Math.max(TIME_BOUNDS.MIN_PERCEPTION, Math.round(time));
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Calculate reading time based on content length
|
|
630
|
+
* Based on research: average reading speed 200-300 words per minute
|
|
631
|
+
* Calibrated based on experimental data (33.3% alignment → improved)
|
|
632
|
+
*/
|
|
633
|
+
function calculateReadingTime(contentLength) {
|
|
634
|
+
// Average: 250 words per minute (research-based)
|
|
635
|
+
// 1 word ≈ 5 characters
|
|
636
|
+
const words = contentLength / 5;
|
|
637
|
+
|
|
638
|
+
// Calibrated: Use faster speed for shorter content (scanning)
|
|
639
|
+
// Slower speed for longer content (deep reading)
|
|
640
|
+
const readingSpeed = words < CONTENT_THRESHOLDS.SHORT / 5
|
|
641
|
+
? READING_SPEEDS.SCANNING
|
|
642
|
+
: words < CONTENT_THRESHOLDS.MEDIUM / 5
|
|
643
|
+
? READING_SPEEDS.NORMAL
|
|
644
|
+
: READING_SPEEDS.DEEP;
|
|
645
|
+
|
|
646
|
+
const minutes = words / readingSpeed;
|
|
647
|
+
const milliseconds = minutes * 60 * 1000;
|
|
648
|
+
|
|
649
|
+
// Calibrated bounds based on experimental data
|
|
650
|
+
const minTime = contentLength < CONTENT_THRESHOLDS.SHORT
|
|
651
|
+
? TIME_BOUNDS.MIN_READING_SHORT
|
|
652
|
+
: TIME_BOUNDS.MIN_READING_LONG;
|
|
653
|
+
const maxTime = contentLength > CONTENT_THRESHOLDS.LONG
|
|
654
|
+
? TIME_BOUNDS.MAX_READING_LONG
|
|
655
|
+
: TIME_BOUNDS.MAX_READING_SHORT;
|
|
656
|
+
|
|
657
|
+
return Math.max(minTime, Math.min(maxTime, milliseconds));
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* Calculate variance
|
|
662
|
+
*/
|
|
663
|
+
function calculateVariance(values) {
|
|
664
|
+
if (values.length === 0) return 0;
|
|
665
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
666
|
+
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length;
|
|
667
|
+
return variance;
|
|
668
|
+
}
|
|
669
|
+
|