@arclabs561/ai-visual-test 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.secretsignore.example +20 -0
  2. package/CHANGELOG.md +360 -0
  3. package/CONTRIBUTING.md +63 -0
  4. package/DEPLOYMENT.md +80 -0
  5. package/LICENSE +22 -0
  6. package/README.md +142 -0
  7. package/SECURITY.md +108 -0
  8. package/api/health.js +34 -0
  9. package/api/validate.js +252 -0
  10. package/index.d.ts +1221 -0
  11. package/package.json +112 -0
  12. package/public/index.html +149 -0
  13. package/src/batch-optimizer.mjs +451 -0
  14. package/src/bias-detector.mjs +370 -0
  15. package/src/bias-mitigation.mjs +233 -0
  16. package/src/cache.mjs +433 -0
  17. package/src/config.mjs +268 -0
  18. package/src/constants.mjs +80 -0
  19. package/src/context-compressor.mjs +350 -0
  20. package/src/convenience.mjs +617 -0
  21. package/src/cost-tracker.mjs +257 -0
  22. package/src/cross-modal-consistency.mjs +170 -0
  23. package/src/data-extractor.mjs +232 -0
  24. package/src/dynamic-few-shot.mjs +140 -0
  25. package/src/dynamic-prompts.mjs +361 -0
  26. package/src/ensemble/index.mjs +53 -0
  27. package/src/ensemble-judge.mjs +366 -0
  28. package/src/error-handler.mjs +67 -0
  29. package/src/errors.mjs +167 -0
  30. package/src/experience-propagation.mjs +128 -0
  31. package/src/experience-tracer.mjs +487 -0
  32. package/src/explanation-manager.mjs +299 -0
  33. package/src/feedback-aggregator.mjs +248 -0
  34. package/src/game-goal-prompts.mjs +478 -0
  35. package/src/game-player.mjs +548 -0
  36. package/src/hallucination-detector.mjs +155 -0
  37. package/src/helpers/playwright.mjs +80 -0
  38. package/src/human-validation-manager.mjs +516 -0
  39. package/src/index.mjs +364 -0
  40. package/src/judge.mjs +929 -0
  41. package/src/latency-aware-batch-optimizer.mjs +192 -0
  42. package/src/load-env.mjs +159 -0
  43. package/src/logger.mjs +55 -0
  44. package/src/metrics.mjs +187 -0
  45. package/src/model-tier-selector.mjs +221 -0
  46. package/src/multi-modal/index.mjs +36 -0
  47. package/src/multi-modal-fusion.mjs +190 -0
  48. package/src/multi-modal.mjs +524 -0
  49. package/src/natural-language-specs.mjs +1071 -0
  50. package/src/pair-comparison.mjs +277 -0
  51. package/src/persona/index.mjs +42 -0
  52. package/src/persona-enhanced.mjs +200 -0
  53. package/src/persona-experience.mjs +572 -0
  54. package/src/position-counterbalance.mjs +140 -0
  55. package/src/prompt-composer.mjs +375 -0
  56. package/src/render-change-detector.mjs +583 -0
  57. package/src/research-enhanced-validation.mjs +436 -0
  58. package/src/retry.mjs +152 -0
  59. package/src/rubrics.mjs +231 -0
  60. package/src/score-tracker.mjs +277 -0
  61. package/src/smart-validator.mjs +447 -0
  62. package/src/spec-config.mjs +106 -0
  63. package/src/spec-templates.mjs +347 -0
  64. package/src/specs/index.mjs +38 -0
  65. package/src/temporal/index.mjs +102 -0
  66. package/src/temporal-adaptive.mjs +163 -0
  67. package/src/temporal-batch-optimizer.mjs +222 -0
  68. package/src/temporal-constants.mjs +69 -0
  69. package/src/temporal-context.mjs +49 -0
  70. package/src/temporal-decision-manager.mjs +271 -0
  71. package/src/temporal-decision.mjs +669 -0
  72. package/src/temporal-errors.mjs +58 -0
  73. package/src/temporal-note-pruner.mjs +173 -0
  74. package/src/temporal-preprocessor.mjs +543 -0
  75. package/src/temporal-prompt-formatter.mjs +219 -0
  76. package/src/temporal-validation.mjs +159 -0
  77. package/src/temporal.mjs +415 -0
  78. package/src/type-guards.mjs +311 -0
  79. package/src/uncertainty-reducer.mjs +470 -0
  80. package/src/utils/index.mjs +175 -0
  81. package/src/validation-framework.mjs +321 -0
  82. package/src/validation-result-normalizer.mjs +64 -0
  83. package/src/validation.mjs +243 -0
  84. package/src/validators/accessibility-programmatic.mjs +345 -0
  85. package/src/validators/accessibility-validator.mjs +223 -0
  86. package/src/validators/batch-validator.mjs +143 -0
  87. package/src/validators/hybrid-validator.mjs +268 -0
  88. package/src/validators/index.mjs +34 -0
  89. package/src/validators/prompt-builder.mjs +218 -0
  90. package/src/validators/rubric.mjs +85 -0
  91. package/src/validators/state-programmatic.mjs +260 -0
  92. package/src/validators/state-validator.mjs +291 -0
  93. package/vercel.json +27 -0
@@ -0,0 +1,669 @@
1
+ /**
2
+ * Temporal Decision-Making
3
+ *
4
+ * Implements multi-scale temporal aggregation for LLM evaluations:
5
+ * - Multi-scale temporal aggregation (0.1s to 60s+)
6
+ * - Sequential decision context
7
+ * - Human perception time modeling
8
+ * - Attention-based weighting
9
+ *
10
+ * Research context:
11
+ * - Efficient Sequential Decision Making (arXiv:2406.12125) - Paper focuses on online
12
+ * model selection achieving 6x performance gain with 1.5% LLM call rate. Our implementation
13
+ * uses multi-scale temporal aggregation (inspired by temporal aspects) but does NOT
14
+ * implement the paper's core online model selection algorithm or decision logic for
15
+ * when to prompt. We cite this for temporal awareness concepts, not the core algorithm.
16
+ * - Human Time Perception (PMC research) - Human perception time scales
17
+ * - Powers of 10: Time Scales in UX (NN/g) - UX time scale research
18
+ *
19
+ * IMPORTANT: This module implements temporal aggregation and attention-based weighting,
20
+ * NOT the adaptive LLM calling strategy or decision logic from arXiv:2406.12125.
21
+ * The paper's core contribution (online model selection, when-to-prompt decisions) is
22
+ * NOT implemented here. We use temporal concepts inspired by the paper's temporal aspects.
23
+ *
24
+ * @module temporal-decision
25
+ */
26
+
27
+ import {
28
+ TIME_SCALES,
29
+ MULTI_SCALE_WINDOWS,
30
+ READING_SPEEDS,
31
+ ATTENTION_MULTIPLIERS,
32
+ COMPLEXITY_MULTIPLIERS,
33
+ CONFIDENCE_THRESHOLDS,
34
+ TIME_BOUNDS,
35
+ CONTENT_THRESHOLDS
36
+ } from './temporal-constants.mjs';
37
+ import { validateAndSortNotes, validateTimeScales, validateAction, validatePerceptionContext, validateSequentialContextOptions } from './temporal-validation.mjs';
38
+ import { MultiScaleError, PerceptionTimeError } from './temporal-errors.mjs';
39
+ import { warn, log } from './logger.mjs';
40
+
41
+ /**
42
+ * Multi-scale temporal aggregation
43
+ * Uses multiple time scales to capture different aspects of human perception
44
+ */
45
+ export function aggregateMultiScale(notes, options = {}) {
46
+ // Validate and sort inputs
47
+ const sortedNotes = validateAndSortNotes(notes);
48
+
49
+ const {
50
+ timeScales = MULTI_SCALE_WINDOWS,
51
+ attentionWeights = true
52
+ } = options;
53
+
54
+ // Validate time scales
55
+ validateTimeScales(timeScales);
56
+
57
+ if (sortedNotes.length === 0) {
58
+ return {
59
+ scales: {},
60
+ summary: 'No notes available',
61
+ coherence: {}
62
+ };
63
+ }
64
+
65
+ const startTime = sortedNotes[0].timestamp || Date.now();
66
+ const scales = {};
67
+
68
+ // Aggregate at each time scale
69
+ for (const [scaleName, windowSize] of Object.entries(timeScales)) {
70
+ const windows = [];
71
+
72
+ for (const note of sortedNotes) {
73
+ const elapsed = note.elapsed || (note.timestamp - startTime);
74
+ const windowIndex = Math.floor(elapsed / windowSize);
75
+
76
+ if (!windows[windowIndex]) {
77
+ windows[windowIndex] = {
78
+ index: windowIndex,
79
+ startTime: startTime + (windowIndex * windowSize),
80
+ endTime: startTime + ((windowIndex + 1) * windowSize),
81
+ notes: [],
82
+ weightedScore: 0,
83
+ totalWeight: 0
84
+ };
85
+ }
86
+
87
+ // Attention-based weighting
88
+ const weight = attentionWeights
89
+ ? calculateAttentionWeight(note, { elapsed, windowSize, scaleName })
90
+ : 1.0;
91
+
92
+ windows[windowIndex].notes.push({ ...note, weight });
93
+
94
+ const score = note.gameState?.score || note.score || 0;
95
+ windows[windowIndex].weightedScore += score * weight;
96
+ windows[windowIndex].totalWeight += weight;
97
+ }
98
+
99
+ // NOTE: windows is a sparse array (indexed by windowIndex), so we need to filter
100
+ // out undefined entries before mapping to ensure all windows have avgScore
101
+ // This prevents "Cannot read properties of undefined (reading 'avgScore')" errors
102
+ const definedWindows = windows.filter(w => w !== undefined);
103
+
104
+ scales[scaleName] = {
105
+ windowSize,
106
+ windows: definedWindows.map(w => ({
107
+ window: w.index,
108
+ timeRange: `${Math.round((w.startTime - startTime) / 1000)}s-${Math.round((w.endTime - startTime) / 1000)}s`,
109
+ avgScore: w.totalWeight > 0 ? w.weightedScore / w.totalWeight : 0,
110
+ noteCount: w.notes.length
111
+ })),
112
+ coherence: calculateCoherenceForScale(definedWindows)
113
+ };
114
+ }
115
+
116
+ return {
117
+ scales,
118
+ summary: generateMultiScaleSummary(scales),
119
+ coherence: Object.fromEntries(
120
+ Object.entries(scales).map(([name, scale]) => [name, scale.coherence])
121
+ )
122
+ };
123
+ }
124
+
125
+ /**
126
+ * Calculate attention-based weight
127
+ * Models how human attention affects temporal perception
128
+ *
129
+ * @param {import('./index.mjs').TemporalNote} note - Temporal note
130
+ * @param {Object} context - Context with elapsed, windowSize, scaleName
131
+ * @returns {number} Attention weight
132
+ */
133
+ export function calculateAttentionWeight(note, context) {
134
+ const { elapsed, windowSize, scaleName } = context;
135
+
136
+ // Base recency weight (exponential decay)
137
+ const recencyWeight = Math.pow(0.9, elapsed / windowSize);
138
+
139
+ // Salience weight (important events get more attention)
140
+ const salienceWeight = calculateSalience(note);
141
+
142
+ // Action weight (user actions focus attention)
143
+ const actionWeight = note.step?.includes('interaction') || note.step?.includes('click')
144
+ ? 1.5
145
+ : 1.0;
146
+
147
+ // Novelty weight (context changes attract attention)
148
+ const noveltyWeight = note.observation?.includes('change') || note.observation?.includes('new')
149
+ ? 1.3
150
+ : 1.0;
151
+
152
+ return recencyWeight * salienceWeight * actionWeight * noveltyWeight;
153
+ }
154
+
155
+ /**
156
+ * Calculate salience (importance) of a note
157
+ */
158
+ function calculateSalience(note) {
159
+ let salience = 1.0;
160
+
161
+ // High scores or low scores are more salient
162
+ const score = note.score || note.gameState?.score || 5;
163
+ if (score >= 8 || score <= 2) {
164
+ salience *= 1.5;
165
+ }
166
+
167
+ // Issues mentioned increase salience
168
+ if (note.issues && note.issues.length > 0) {
169
+ salience *= 1.2;
170
+ }
171
+
172
+ // Critical keywords increase salience
173
+ const criticalKeywords = ['error', 'broken', 'fail', 'critical', 'important'];
174
+ const observation = (note.observation || '').toLowerCase();
175
+ if (criticalKeywords.some(kw => observation.includes(kw))) {
176
+ salience *= 1.3;
177
+ }
178
+
179
+ return salience;
180
+ }
181
+
182
+ /**
183
+ * Calculate coherence for a specific time scale
184
+ */
185
+ function calculateCoherenceForScale(windows) {
186
+ if (windows.length < 2) return 1.0;
187
+
188
+ const scores = windows.map(w =>
189
+ w.totalWeight > 0 ? w.weightedScore / w.totalWeight : 0
190
+ ).filter(s => !isNaN(s) && isFinite(s));
191
+
192
+ // Direction consistency calculation
193
+ // Need at least 2 scores to calculate direction
194
+ if (scores.length < 2) return 1.0;
195
+
196
+ // Calculate trends (direction of change between consecutive scores)
197
+ const trends = [];
198
+ for (let i = 1; i < scores.length; i++) {
199
+ const change = scores[i] - scores[i - 1];
200
+ trends.push(change >= 0 ? 1 : -1);
201
+ }
202
+
203
+ // Count direction changes
204
+ let directionChanges = 0;
205
+ for (let i = 1; i < trends.length; i++) {
206
+ if (trends[i] !== trends[i - 1]) {
207
+ directionChanges++;
208
+ }
209
+ }
210
+ const directionConsistency = Math.max(0, Math.min(1, 1.0 - (directionChanges / Math.max(1, trends.length))));
211
+
212
+ // Use stricter variance normalization (same as temporal.mjs)
213
+ const meanScore = scores.reduce((a, b) => a + b, 0) / scores.length;
214
+ const variance = scores.reduce((sum, s) => sum + Math.pow(s - meanScore, 2), 0) / scores.length;
215
+
216
+ // Use score range to determine max variance, not meanScore^2
217
+ const scoreRange = Math.max(...scores) - Math.min(...scores);
218
+ const maxVariance = Math.max(
219
+ Math.pow(scoreRange / 2, 2),
220
+ Math.pow(meanScore * 0.5, 2),
221
+ 10
222
+ );
223
+ const varianceCoherence = Math.max(0, Math.min(1, 1.0 - (variance / maxVariance)));
224
+
225
+ // Add stability metric
226
+ const maxPossibleChanges = Math.max(1, scores.length - 2);
227
+ const stability = Math.max(0, Math.min(1, 1.0 - (directionChanges / maxPossibleChanges)));
228
+
229
+ // Updated weights: direction 0.4, stability 0.3, variance 0.3
230
+ const coherence = directionConsistency * 0.4 + stability * 0.3 + varianceCoherence * 0.3;
231
+
232
+ // Clamp to [0, 1] and handle NaN/Infinity
233
+ const clamped = Math.max(0, Math.min(1, isNaN(coherence) || !isFinite(coherence) ? 0.5 : coherence));
234
+ return clamped;
235
+ }
236
+
237
+ /**
238
+ * Generate summary across multiple time scales
239
+ */
240
+ function generateMultiScaleSummary(scales) {
241
+ const parts = [];
242
+
243
+ for (const [scaleName, scale] of Object.entries(scales)) {
244
+ if (scale && scale.windows && scale.windows.length > 0) {
245
+ const firstWindow = scale.windows[0];
246
+ const lastWindow = scale.windows[scale.windows.length - 1];
247
+
248
+ // Defensive check: windows might not have avgScore if they're empty
249
+ if (firstWindow && lastWindow &&
250
+ firstWindow.avgScore !== undefined &&
251
+ lastWindow.avgScore !== undefined) {
252
+ const first = firstWindow.avgScore;
253
+ const last = lastWindow.avgScore;
254
+ const coherence = scale.coherence !== undefined ? scale.coherence : 0;
255
+ parts.push(`${scaleName} scale (${scale.windowSize}ms): ${first.toFixed(1)} → ${last.toFixed(1)}, coherence: ${(coherence * 100).toFixed(0)}%`);
256
+ }
257
+ }
258
+ }
259
+
260
+ return parts.join('; ');
261
+ }
262
+
263
+ /**
264
+ * Sequential Decision Context
265
+ * Maintains context across LLM calls for better sequential decision-making
266
+ */
267
+ export class SequentialDecisionContext {
268
+ constructor(options = {}) {
269
+ // Validate options
270
+ validateSequentialContextOptions(options);
271
+
272
+ this.history = [];
273
+ this.currentState = null;
274
+ this.adaptations = {};
275
+ this.maxHistory = options.maxHistory || 10;
276
+ // NOTE: Default to false based on evaluation data showing sequential context increases variance
277
+ // Evaluation data (data-driven-analysis-1762832349830.json) shows:
278
+ // - Isolated variance: 0.231
279
+ // - Sequential variance: 0.324 (40% increase)
280
+ // Research shows sequential context can increase variance due to prompt brittleness, attention variability
281
+ // Users should explicitly enable if they need sequential context, understanding the variance trade-off
282
+ this.adaptationEnabled = options.adaptationEnabled === true;
283
+ this.varianceTracking = options.varianceTracking !== false; // Track variance by default
284
+ this.baselineVariance = null; // Will be set after first few isolated evaluations
285
+ }
286
+
287
+ /**
288
+ * Add decision to history
289
+ */
290
+ addDecision(decision) {
291
+ this.history.push({
292
+ ...decision,
293
+ timestamp: Date.now(),
294
+ index: this.history.length
295
+ });
296
+
297
+ // Keep only recent history
298
+ if (this.history.length > this.maxHistory) {
299
+ this.history.shift();
300
+ }
301
+
302
+ // Update current state
303
+ this.currentState = decision;
304
+
305
+ // Track baseline variance for first few isolated evaluations (before sequential context kicks in)
306
+ // This allows us to detect if sequential context increases variance
307
+ // Research shows sequential context can increase variance by 40%+ due to prompt brittleness,
308
+ // attention variability, and few-shot learning instability (up to 14% variance from example selection)
309
+ if (this.varianceTracking && this.history.length >= 3 && this.baselineVariance === null) {
310
+ const scores = this.history.map(d => d.score).filter(s => s !== null);
311
+ if (scores.length >= 3) {
312
+ this.baselineVariance = calculateVariance(scores);
313
+ }
314
+ }
315
+ }
316
+
317
+ /**
318
+ * Adapt prompt based on history
319
+ */
320
+ adaptPrompt(basePrompt, currentContext) {
321
+ if (!this.adaptationEnabled || this.history.length === 0) {
322
+ return basePrompt;
323
+ }
324
+
325
+ // Identify patterns in history
326
+ const patterns = this.identifyPatterns();
327
+
328
+ // NOTE: Check if variance has increased (evaluation data shows sequential context can increase variance)
329
+ // If variance tracking is enabled and variance has increased significantly, disable adaptation
330
+ // VERIFIABLE: Variance increase is always logged (not just in verbose mode) and tracked in metrics
331
+ // ENHANCEMENT: Also track variance decreases (improvements) for completeness
332
+ if (this.varianceTracking && this.baselineVariance !== null && patterns.scoreVariance) {
333
+ const varianceChange = (patterns.scoreVariance - this.baselineVariance) / this.baselineVariance;
334
+ // If variance increased by more than 20%, disable adaptation to prevent further degradation
335
+ if (varianceChange > 0.2) {
336
+ // VERIFIABLE: Always log variance increase (not just in verbose mode) - this is a critical metric
337
+ warn(`[SequentialContext] Variance increased by ${(varianceChange * 100).toFixed(1)}% (${this.baselineVariance.toFixed(3)} → ${patterns.scoreVariance.toFixed(3)}). Disabling adaptation to prevent further degradation.`);
338
+ // Track variance increase event for metrics
339
+ if (!this.varianceIncreaseEvents) {
340
+ this.varianceIncreaseEvents = [];
341
+ }
342
+ this.varianceIncreaseEvents.push({
343
+ timestamp: Date.now(),
344
+ baselineVariance: this.baselineVariance,
345
+ currentVariance: patterns.scoreVariance,
346
+ increasePercent: varianceChange * 100,
347
+ historyLength: this.history.length
348
+ });
349
+ // Temporarily disable adaptation for this prompt
350
+ return basePrompt;
351
+ }
352
+ // ENHANCEMENT: Track variance decreases (improvements) - MCP research shows this is valuable
353
+ // Variance decrease indicates improved model stability
354
+ if (varianceChange < -0.1) { // 10% decrease threshold
355
+ log(`[SequentialContext] Variance decreased by ${Math.abs(varianceChange * 100).toFixed(1)}% (${this.baselineVariance.toFixed(3)} → ${patterns.scoreVariance.toFixed(3)}). Model stability improved.`);
356
+ // Track variance decrease for metrics (could add separate array, but using same structure for now)
357
+ if (!this.varianceIncreaseEvents) {
358
+ this.varianceIncreaseEvents = [];
359
+ }
360
+ this.varianceIncreaseEvents.push({
361
+ timestamp: Date.now(),
362
+ baselineVariance: this.baselineVariance,
363
+ currentVariance: patterns.scoreVariance,
364
+ increasePercent: varianceChange * 100, // Negative for decreases
365
+ historyLength: this.history.length,
366
+ type: 'decrease'
367
+ });
368
+ }
369
+ }
370
+
371
+ // Build context from history
372
+ const historyContext = this.buildHistoryContext(patterns);
373
+
374
+ // Adapt prompt
375
+ return `${basePrompt}
376
+
377
+ ## Previous Evaluation Context:
378
+ ${historyContext}
379
+
380
+ ## Adaptation Instructions:
381
+ ${this.buildAdaptationInstructions(patterns, currentContext)}`;
382
+ }
383
+
384
+ /**
385
+ * Identify patterns in decision history
386
+ */
387
+ identifyPatterns() {
388
+ if (this.history.length < 2) return {};
389
+
390
+ const scores = this.history.map(d => d.score).filter(s => s !== null);
391
+ const issues = this.history.flatMap(d => d.issues || []);
392
+
393
+ // Trend pattern
394
+ const trend = scores.length >= 2
395
+ ? scores[scores.length - 1] > scores[scores.length - 2] ? 'improving' : 'declining'
396
+ : 'stable';
397
+
398
+ // Common issues
399
+ const issueCounts = {};
400
+ issues.forEach(issue => {
401
+ issueCounts[issue] = (issueCounts[issue] || 0) + 1;
402
+ });
403
+ const commonIssues = Object.entries(issueCounts)
404
+ .filter(([_, count]) => count >= 2)
405
+ .map(([issue, _]) => issue);
406
+
407
+ // Consistency
408
+ const scoreVariance = scores.length > 1
409
+ ? calculateVariance(scores)
410
+ : 0;
411
+ const isConsistent = scoreVariance < 2.0;
412
+
413
+ return {
414
+ trend,
415
+ commonIssues,
416
+ isConsistent,
417
+ scoreVariance,
418
+ recentScores: scores.slice(-3)
419
+ };
420
+ }
421
+
422
+ /**
423
+ * Build history context for prompt
424
+ */
425
+ buildHistoryContext(patterns) {
426
+ const parts = [];
427
+
428
+ if (this.history.length > 0) {
429
+ const recent = this.history.slice(-3);
430
+ parts.push(`Recent evaluations (${this.history.length} total):`);
431
+ recent.forEach((d, i) => {
432
+ parts.push(` ${i + 1}. Score: ${d.score?.toFixed(1) || 'N/A'}/10, Issues: ${(d.issues || []).length}`);
433
+ });
434
+ }
435
+
436
+ if (patterns.trend) {
437
+ parts.push(`Trend: ${patterns.trend}`);
438
+ }
439
+
440
+ if (patterns.commonIssues.length > 0) {
441
+ parts.push(`Recurring issues: ${patterns.commonIssues.join(', ')}`);
442
+ }
443
+
444
+ if (!patterns.isConsistent) {
445
+ parts.push(`Warning: Inconsistent scores detected (variance: ${patterns.scoreVariance.toFixed(2)})`);
446
+ }
447
+
448
+ return parts.join('\n');
449
+ }
450
+
451
+ /**
452
+ * Build adaptation instructions
453
+ * Data-driven: Adaptive confidence thresholds based on experimental findings
454
+ * Research shows sequential context can increase variance if over-applied
455
+ */
456
+ buildAdaptationInstructions(patterns, currentContext) {
457
+ const instructions = [];
458
+
459
+ // Calculate confidence level based on variance and pattern strength
460
+ const variance = patterns.scoreVariance || 0;
461
+ const hasStrongPatterns = patterns.commonIssues.length > 0;
462
+ const confidence = variance < CONFIDENCE_THRESHOLDS.HIGH_VARIANCE && hasStrongPatterns ? 'high' :
463
+ variance < CONFIDENCE_THRESHOLDS.MEDIUM_VARIANCE || hasStrongPatterns ? 'medium' : 'low';
464
+
465
+ // Only add strong instructions when confidence is high (data shows over-correction)
466
+ if (patterns.trend === 'declining' && confidence === 'high') {
467
+ instructions.push('Previous evaluations showed declining quality. Pay special attention to issues.');
468
+ } else if (patterns.trend === 'declining' && confidence === 'medium') {
469
+ instructions.push('Previous evaluations showed a slight decline. Consider checking for issues.');
470
+ }
471
+
472
+ if (patterns.commonIssues.length > 0) {
473
+ if (confidence === 'high') {
474
+ instructions.push(`Look for these recurring issues: ${patterns.commonIssues.join(', ')}`);
475
+ } else if (confidence === 'medium') {
476
+ instructions.push(`These issues appeared in previous evaluations: ${patterns.commonIssues.join(', ')}. Consider checking for them.`);
477
+ }
478
+ }
479
+
480
+ if (!patterns.isConsistent) {
481
+ instructions.push('Previous evaluations were inconsistent. Be especially careful and thorough.');
482
+ }
483
+
484
+ // Always provide context but emphasize independence (data shows context can increase variance)
485
+ // Use gentler language for lower confidence
486
+ if (patterns.recentScores.length > 0) {
487
+ const avgRecent = patterns.recentScores.reduce((a, b) => a + b, 0) / patterns.recentScores.length;
488
+ if (confidence === 'high') {
489
+ instructions.push(`Recent average score: ${avgRecent.toFixed(1)}/10. Use this as context but evaluate independently.`);
490
+ } else {
491
+ instructions.push(`Recent evaluations averaged ${avgRecent.toFixed(1)}/10. Evaluate independently based on current screenshot.`);
492
+ }
493
+ }
494
+
495
+ return instructions.length > 0
496
+ ? instructions.join('\n')
497
+ : 'Evaluate independently, but consider previous context for consistency.';
498
+ }
499
+
500
+ /**
501
+ * Get context for current decision
502
+ *
503
+ * VERIFIABLE: Returns variance metrics to verify claims about variance tracking
504
+ */
505
+ getContext() {
506
+ const patterns = this.identifyPatterns();
507
+ return {
508
+ historyLength: this.history.length,
509
+ recentDecisions: this.history.slice(-3),
510
+ patterns,
511
+ // VERIFIABLE: Export variance metrics to verify variance tracking claims
512
+ varianceMetrics: this.varianceTracking ? {
513
+ baselineVariance: this.baselineVariance,
514
+ currentVariance: patterns.scoreVariance,
515
+ varianceIncrease: this.baselineVariance !== null && patterns.scoreVariance
516
+ ? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) * 100
517
+ : null,
518
+ varianceIncreaseEvents: this.varianceIncreaseEvents || [],
519
+ adaptationEnabled: this.adaptationEnabled,
520
+ adaptationDisabledDueToVariance: this.baselineVariance !== null && patterns.scoreVariance
521
+ ? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) > 0.2
522
+ : false
523
+ } : null
524
+ };
525
+ }
526
+
527
+ /**
528
+ * Get variance statistics for verification
529
+ *
530
+ * VERIFIABLE: Exports variance metrics to verify claims about variance increase detection
531
+ *
532
+ * @returns {Object} Variance statistics
533
+ */
534
+ getVarianceStats() {
535
+ if (!this.varianceTracking) {
536
+ return { trackingEnabled: false };
537
+ }
538
+
539
+ const patterns = this.identifyPatterns();
540
+ return {
541
+ trackingEnabled: true,
542
+ baselineVariance: this.baselineVariance,
543
+ currentVariance: patterns.scoreVariance,
544
+ varianceIncrease: this.baselineVariance !== null && patterns.scoreVariance
545
+ ? ((patterns.scoreVariance - this.baselineVariance) / this.baselineVariance) * 100
546
+ : null,
547
+ varianceIncreaseEvents: this.varianceIncreaseEvents || [],
548
+ adaptationEnabled: this.adaptationEnabled,
549
+ historyLength: this.history.length,
550
+ scores: this.history.map(d => d.score).filter(s => s !== null)
551
+ };
552
+ }
553
+ }
554
+
555
+ /**
556
+ * Human Perception Time Modeling
557
+ * Models human perception at different time scales
558
+ * Based on research:
559
+ * - 0.1s threshold for direct manipulation (NN/g)
560
+ * - 50ms for visual appeal decisions (Lindgaard research)
561
+ * - 200-300 words/minute reading speed
562
+ * - Attention affects temporal perception
563
+ */
564
+ export function humanPerceptionTime(action, context = {}) {
565
+ // Validate inputs
566
+ validateAction(action);
567
+ validatePerceptionContext(context);
568
+
569
+ const {
570
+ persona = null,
571
+ attentionLevel = 'normal',
572
+ actionComplexity = 'normal',
573
+ contentLength = 0
574
+ } = context;
575
+
576
+ // Base times from research (NN/g, PMC, Lindgaard)
577
+ const baseTimes = {
578
+ instant: TIME_SCALES.INSTANT,
579
+ visualDecision: TIME_SCALES.VISUAL_DECISION,
580
+ quick: TIME_SCALES.QUICK,
581
+ normal: TIME_SCALES.NORMAL,
582
+ extended: TIME_SCALES.EXTENDED
583
+ };
584
+
585
+ // Action-specific base times (research-aligned, calibrated)
586
+ const actionTimes = {
587
+ 'page-load': baseTimes.normal,
588
+ 'reading': calculateReadingTime(contentLength),
589
+ 'interaction': baseTimes.quick,
590
+ 'evaluation': baseTimes.extended,
591
+ 'scanning': baseTimes.quick,
592
+ 'visual-appeal': baseTimes.visualDecision
593
+ };
594
+
595
+ // Calibration: visual-appeal needs minimum 100ms (research says 50ms, but our implementation has minimum)
596
+ if (action === 'visual-appeal') {
597
+ let time = TIME_BOUNDS.MIN_PERCEPTION; // Start at minimum
598
+ if (attentionLevel === 'focused') time = 80;
599
+ if (attentionLevel === 'distracted') time = 120;
600
+ return Math.max(TIME_SCALES.VISUAL_DECISION, Math.min(200, time));
601
+ }
602
+
603
+ let time = actionTimes[action] || baseTimes.normal;
604
+
605
+ // Adjust for attention level (research: attention affects temporal perception)
606
+ time *= ATTENTION_MULTIPLIERS[attentionLevel] || 1.0;
607
+
608
+ // Adjust for action complexity
609
+ time *= COMPLEXITY_MULTIPLIERS[actionComplexity] || 1.0;
610
+
611
+ // Adjust for persona (if provided)
612
+ if (persona) {
613
+ // Fast personas (e.g., power users) are faster
614
+ // Slow personas (e.g., accessibility-focused) take more time
615
+ if (persona.name?.toLowerCase().includes('power') ||
616
+ persona.name?.toLowerCase().includes('expert')) {
617
+ time *= 0.8;
618
+ } else if (persona.name?.toLowerCase().includes('accessibility') ||
619
+ persona.name?.toLowerCase().includes('careful')) {
620
+ time *= 1.3; // Accessibility-focused users take more time
621
+ }
622
+ }
623
+
624
+ // Ensure minimum time based on research (0.1s for perception)
625
+ return Math.max(TIME_BOUNDS.MIN_PERCEPTION, Math.round(time));
626
+ }
627
+
628
+ /**
629
+ * Calculate reading time based on content length
630
+ * Based on research: average reading speed 200-300 words per minute
631
+ * Calibrated based on experimental data (33.3% alignment → improved)
632
+ */
633
+ function calculateReadingTime(contentLength) {
634
+ // Average: 250 words per minute (research-based)
635
+ // 1 word ≈ 5 characters
636
+ const words = contentLength / 5;
637
+
638
+ // Calibrated: Use faster speed for shorter content (scanning)
639
+ // Slower speed for longer content (deep reading)
640
+ const readingSpeed = words < CONTENT_THRESHOLDS.SHORT / 5
641
+ ? READING_SPEEDS.SCANNING
642
+ : words < CONTENT_THRESHOLDS.MEDIUM / 5
643
+ ? READING_SPEEDS.NORMAL
644
+ : READING_SPEEDS.DEEP;
645
+
646
+ const minutes = words / readingSpeed;
647
+ const milliseconds = minutes * 60 * 1000;
648
+
649
+ // Calibrated bounds based on experimental data
650
+ const minTime = contentLength < CONTENT_THRESHOLDS.SHORT
651
+ ? TIME_BOUNDS.MIN_READING_SHORT
652
+ : TIME_BOUNDS.MIN_READING_LONG;
653
+ const maxTime = contentLength > CONTENT_THRESHOLDS.LONG
654
+ ? TIME_BOUNDS.MAX_READING_LONG
655
+ : TIME_BOUNDS.MAX_READING_SHORT;
656
+
657
+ return Math.max(minTime, Math.min(maxTime, milliseconds));
658
+ }
659
+
660
+ /**
661
+ * Calculate variance
662
+ */
663
+ function calculateVariance(values) {
664
+ if (values.length === 0) return 0;
665
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
666
+ const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length;
667
+ return variance;
668
+ }
669
+