@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
@@ -0,0 +1,233 @@
1
+ /**
2
+ * BottomSheet Component
3
+ *
4
+ * A premium mobile-native bottom sheet modal with drag-to-dismiss gesture support.
5
+ * Features glass morphism, smooth animations, and haptic feedback.
6
+ * Used for displaying run details, dialogue entries, and other drill-down content.
7
+ */
8
+
9
+ import React, { useState, useRef, useCallback, useEffect } from 'react';
10
+ import haptics from '../../utils/haptics';
11
+
12
+ interface BottomSheetProps {
13
+ isOpen: boolean;
14
+ onClose: () => void;
15
+ title?: string;
16
+ subtitle?: string;
17
+ children: React.ReactNode;
18
+ maxHeight?: string;
19
+ }
20
+
21
+ export const BottomSheet: React.FC<BottomSheetProps> = ({
22
+ isOpen,
23
+ onClose,
24
+ title,
25
+ subtitle,
26
+ children,
27
+ maxHeight = '85vh'
28
+ }) => {
29
+ const [dragY, setDragY] = useState(0);
30
+ const [isDragging, setIsDragging] = useState(false);
31
+ const [isClosing, setIsClosing] = useState(false);
32
+ const dragStartY = useRef<number | null>(null);
33
+ const velocityY = useRef(0);
34
+ const lastY = useRef(0);
35
+ const lastTime = useRef(Date.now());
36
+ const sheetRef = useRef<HTMLDivElement>(null);
37
+
38
+ // Reset drag state when sheet opens
39
+ useEffect(() => {
40
+ if (isOpen) {
41
+ setDragY(0);
42
+ setIsDragging(false);
43
+ setIsClosing(false);
44
+ velocityY.current = 0;
45
+ }
46
+ }, [isOpen]);
47
+
48
+ // Prevent body scroll when sheet is open
49
+ useEffect(() => {
50
+ if (isOpen) {
51
+ document.body.style.overflow = 'hidden';
52
+ } else {
53
+ document.body.style.overflow = '';
54
+ }
55
+ return () => {
56
+ document.body.style.overflow = '';
57
+ };
58
+ }, [isOpen]);
59
+
60
+ const handleTouchStart = useCallback((e: React.TouchEvent) => {
61
+ // Only start drag from the handle area
62
+ const target = e.target as HTMLElement;
63
+ if (target.closest('.bottom-sheet-handle')) {
64
+ dragStartY.current = e.touches[0].clientY;
65
+ lastY.current = e.touches[0].clientY;
66
+ lastTime.current = Date.now();
67
+ setIsDragging(true);
68
+ haptics.light();
69
+ }
70
+ }, []);
71
+
72
+ const handleTouchMove = useCallback((e: React.TouchEvent) => {
73
+ if (!isDragging || dragStartY.current === null) return;
74
+
75
+ const currentY = e.touches[0].clientY;
76
+ const deltaY = currentY - dragStartY.current;
77
+
78
+ // Calculate velocity for momentum-based closing
79
+ const now = Date.now();
80
+ const dt = now - lastTime.current;
81
+ if (dt > 0) {
82
+ velocityY.current = (currentY - lastY.current) / dt;
83
+ }
84
+ lastY.current = currentY;
85
+ lastTime.current = now;
86
+
87
+ // Only allow downward drag with rubber band effect
88
+ if (deltaY > 0) {
89
+ // Add resistance as user drags further
90
+ const resistance = 1 - Math.min(deltaY / 500, 0.5);
91
+ setDragY(deltaY * resistance);
92
+ }
93
+ }, [isDragging]);
94
+
95
+ const handleTouchEnd = useCallback(() => {
96
+ if (!isDragging) return;
97
+
98
+ // Close if dragged more than 100px down OR with high velocity
99
+ const shouldClose = dragY > 100 || (velocityY.current > 0.5 && dragY > 30);
100
+
101
+ if (shouldClose) {
102
+ haptics.medium();
103
+ setIsClosing(true);
104
+ // Wait for animation before calling onClose
105
+ setTimeout(() => {
106
+ onClose();
107
+ setIsClosing(false);
108
+ }, 200);
109
+ } else {
110
+ // Snap back
111
+ haptics.light();
112
+ }
113
+
114
+ setDragY(0);
115
+ setIsDragging(false);
116
+ dragStartY.current = null;
117
+ velocityY.current = 0;
118
+ }, [isDragging, dragY, onClose]);
119
+
120
+ const handleBackdropClick = useCallback(() => {
121
+ haptics.light();
122
+ setIsClosing(true);
123
+ setTimeout(() => {
124
+ onClose();
125
+ setIsClosing(false);
126
+ }, 200);
127
+ }, [onClose]);
128
+
129
+ const handleCloseButton = useCallback(() => {
130
+ haptics.light();
131
+ setIsClosing(true);
132
+ setTimeout(() => {
133
+ onClose();
134
+ setIsClosing(false);
135
+ }, 200);
136
+ }, [onClose]);
137
+
138
+ if (!isOpen) return null;
139
+
140
+ // Calculate backdrop opacity based on drag
141
+ const backdropOpacity = Math.max(0, 0.7 - (dragY / 300));
142
+
143
+ return (
144
+ <>
145
+ {/* Backdrop with blur */}
146
+ <div
147
+ className="fixed inset-0 z-40 transition-all duration-300"
148
+ style={{
149
+ backgroundColor: `rgba(0, 0, 0, ${isClosing ? 0 : backdropOpacity})`,
150
+ backdropFilter: isClosing ? 'none' : `blur(${Math.max(0, 4 - dragY / 50)}px)`,
151
+ }}
152
+ onClick={handleBackdropClick}
153
+ />
154
+
155
+ {/* Sheet with glass morphism */}
156
+ <div
157
+ ref={sheetRef}
158
+ className={`fixed left-0 right-0 bottom-0 z-50
159
+ bg-gray-900/95 backdrop-blur-xl
160
+ border-t border-white/10
161
+ rounded-t-3xl shadow-2xl
162
+ ${isClosing ? 'animate-slide-down' : 'animate-slide-up'}`}
163
+ style={{
164
+ maxHeight,
165
+ transform: `translateY(${isClosing ? '100%' : `${dragY}px`})`,
166
+ transition: isDragging ? 'none' : 'transform 0.3s cubic-bezier(0.32, 0.72, 0, 1)',
167
+ paddingBottom: 'env(safe-area-inset-bottom)',
168
+ boxShadow: '0 -10px 40px rgba(0, 0, 0, 0.3), 0 0 80px rgba(230, 57, 70, 0.05)'
169
+ }}
170
+ onTouchStart={handleTouchStart}
171
+ onTouchMove={handleTouchMove}
172
+ onTouchEnd={handleTouchEnd}
173
+ >
174
+ {/* Drag handle - Premium styling */}
175
+ <div className="bottom-sheet-handle flex flex-col items-center pt-4 pb-3 cursor-grab active:cursor-grabbing">
176
+ <div className="relative">
177
+ {/* Glow effect on drag */}
178
+ {isDragging && (
179
+ <div className="absolute inset-0 w-12 h-1.5 rounded-full bg-brand-red/30 blur-md" />
180
+ )}
181
+ <div className={`w-12 h-1.5 rounded-full transition-all duration-200
182
+ ${isDragging ? 'bg-brand-red scale-110' : 'bg-white/20'}`}
183
+ />
184
+ </div>
185
+ {/* Drag hint */}
186
+ {dragY > 50 && (
187
+ <span className="mt-2 text-[10px] text-gray-500 font-medium animate-fade-in">
188
+ Release to close
189
+ </span>
190
+ )}
191
+ </div>
192
+
193
+ {/* Header with title and close button */}
194
+ {title && (
195
+ <div className="flex items-center justify-between px-5 pb-4 border-b border-white/5">
196
+ <div className="flex-1 min-w-0">
197
+ <h3 className="text-lg font-bold text-white truncate">{title}</h3>
198
+ {subtitle && (
199
+ <p className="text-xs text-gray-500 mt-0.5 truncate">{subtitle}</p>
200
+ )}
201
+ </div>
202
+ <button
203
+ type="button"
204
+ onClick={handleCloseButton}
205
+ className="flex-shrink-0 ml-3 w-9 h-9 -mr-1 flex items-center justify-center
206
+ rounded-full bg-white/5 border border-white/10
207
+ text-gray-400 hover:text-white hover:bg-white/10
208
+ active:scale-95 transition-all duration-150"
209
+ aria-label="Close"
210
+ >
211
+ <svg className="w-5 h-5" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
212
+ <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
213
+ </svg>
214
+ </button>
215
+ </div>
216
+ )}
217
+
218
+ {/* Content with scrollbar hidden */}
219
+ <div
220
+ className="overflow-y-auto overscroll-contain scrollbar-hide"
221
+ style={{ maxHeight: title ? 'calc(85vh - 100px)' : 'calc(85vh - 50px)' }}
222
+ >
223
+ {children}
224
+ </div>
225
+
226
+ {/* Bottom safe area gradient */}
227
+ <div className="absolute bottom-0 left-0 right-0 h-6 bg-gradient-to-t from-gray-900 to-transparent pointer-events-none" />
228
+ </div>
229
+ </>
230
+ );
231
+ };
232
+
233
+ export default BottomSheet;
@@ -0,0 +1,210 @@
1
+ /**
2
+ * DimensionBreakdown Component
3
+ *
4
+ * Displays the 6 evaluation dimensions as horizontal progress bars.
5
+ * Premium glass morphism styling with animated transitions.
6
+ * More readable on mobile than radar charts.
7
+ */
8
+
9
+ import React from 'react';
10
+ import type { EvalDimensionScores, EvalDimensionScore } from '../../types';
11
+ import haptics from '../../utils/haptics';
12
+
13
+ interface DimensionBreakdownProps {
14
+ scores: EvalDimensionScores;
15
+ compact?: boolean;
16
+ showLabels?: boolean;
17
+ }
18
+
19
+ interface DimensionConfig {
20
+ key: keyof EvalDimensionScores;
21
+ label: string;
22
+ shortLabel: string;
23
+ color: string;
24
+ bgColor: string;
25
+ glowColor: string;
26
+ }
27
+
28
+ // Premium dimension configurations with consistent color system
29
+ const dimensions: DimensionConfig[] = [
30
+ {
31
+ key: 'relevance',
32
+ label: 'Relevance',
33
+ shortLabel: 'REL',
34
+ color: 'bg-dimension-relevance',
35
+ bgColor: 'bg-dimension-relevance/20',
36
+ glowColor: 'shadow-[0_0_10px_rgba(230,57,70,0.3)]'
37
+ },
38
+ {
39
+ key: 'specificity',
40
+ label: 'Specificity',
41
+ shortLabel: 'SPE',
42
+ color: 'bg-dimension-specificity',
43
+ bgColor: 'bg-dimension-specificity/20',
44
+ glowColor: 'shadow-[0_0_10px_rgba(69,123,157,0.3)]'
45
+ },
46
+ {
47
+ key: 'pedagogical',
48
+ label: 'Pedagogical',
49
+ shortLabel: 'PED',
50
+ color: 'bg-dimension-pedagogical',
51
+ bgColor: 'bg-dimension-pedagogical/20',
52
+ glowColor: 'shadow-[0_0_10px_rgba(42,157,143,0.3)]'
53
+ },
54
+ {
55
+ key: 'personalization',
56
+ label: 'Personalization',
57
+ shortLabel: 'PER',
58
+ color: 'bg-dimension-personalization',
59
+ bgColor: 'bg-dimension-personalization/20',
60
+ glowColor: 'shadow-[0_0_10px_rgba(233,196,106,0.3)]'
61
+ },
62
+ {
63
+ key: 'actionability',
64
+ label: 'Actionability',
65
+ shortLabel: 'ACT',
66
+ color: 'bg-dimension-actionability',
67
+ bgColor: 'bg-dimension-actionability/20',
68
+ glowColor: 'shadow-[0_0_10px_rgba(244,162,97,0.3)]'
69
+ },
70
+ {
71
+ key: 'tone',
72
+ label: 'Tone',
73
+ shortLabel: 'TON',
74
+ color: 'bg-dimension-tone',
75
+ bgColor: 'bg-dimension-tone/20',
76
+ glowColor: 'shadow-[0_0_10px_rgba(131,56,236,0.3)]'
77
+ }
78
+ ];
79
+
80
+ // Extract numeric value from dimension score
81
+ function getScoreValue(score: EvalDimensionScore): number | null {
82
+ if (score === null || score === undefined) return null;
83
+ if (typeof score === 'number') return score;
84
+ if (typeof score === 'object' && 'score' in score) return score.score;
85
+ return null;
86
+ }
87
+
88
+ // Get reasoning if available
89
+ function getScoreReasoning(score: EvalDimensionScore): string | undefined {
90
+ if (score === null || score === undefined) return undefined;
91
+ if (typeof score === 'object' && 'reasoning' in score) return score.reasoning;
92
+ return undefined;
93
+ }
94
+
95
+ // Get score quality indicator
96
+ function getScoreQuality(value: number | null): { label: string; color: string } {
97
+ if (value === null) return { label: '', color: 'text-gray-600' };
98
+ if (value >= 4) return { label: 'Excellent', color: 'text-green-400' };
99
+ if (value >= 3) return { label: 'Good', color: 'text-blue-400' };
100
+ if (value >= 2) return { label: 'Fair', color: 'text-yellow-400' };
101
+ return { label: 'Needs work', color: 'text-red-400' };
102
+ }
103
+
104
+ export const DimensionBreakdown: React.FC<DimensionBreakdownProps> = ({
105
+ scores,
106
+ compact = false,
107
+ showLabels = true
108
+ }) => {
109
+ const [expandedDimension, setExpandedDimension] = React.useState<string | null>(null);
110
+
111
+ return (
112
+ <div className={compact ? 'space-y-2' : 'space-y-3'}>
113
+ {dimensions.map((dim, index) => {
114
+ const rawScore = scores[dim.key];
115
+ const value = getScoreValue(rawScore);
116
+ const reasoning = getScoreReasoning(rawScore);
117
+ const percentage = value !== null ? (value / 5) * 100 : 0;
118
+ const isExpanded = expandedDimension === dim.key;
119
+ const quality = getScoreQuality(value);
120
+
121
+ return (
122
+ <div
123
+ key={dim.key}
124
+ className="animate-fade-in"
125
+ style={{ animationDelay: `${index * 50}ms` }}
126
+ >
127
+ <button
128
+ type="button"
129
+ onClick={() => {
130
+ if (reasoning) {
131
+ haptics.light();
132
+ setExpandedDimension(isExpanded ? null : dim.key);
133
+ }
134
+ }}
135
+ className={`w-full text-left transition-all duration-200 rounded-lg
136
+ ${reasoning ? 'cursor-pointer hover:bg-white/5 active:scale-[0.99]' : 'cursor-default'}
137
+ ${compact ? 'p-1' : 'p-1.5 -mx-1.5'}`}
138
+ disabled={!reasoning}
139
+ >
140
+ <div className="flex justify-between items-center mb-1.5">
141
+ <div className="flex items-center gap-2">
142
+ {/* Color indicator dot */}
143
+ <div className={`w-2 h-2 rounded-full ${dim.color} ${value !== null && value >= 4 ? dim.glowColor : ''}`} />
144
+ <span className={`font-medium ${compact ? 'text-xs' : 'text-sm'} text-gray-300`}>
145
+ {showLabels ? (compact ? dim.shortLabel : dim.label) : dim.shortLabel}
146
+ </span>
147
+ {reasoning && (
148
+ <svg
149
+ className={`w-3 h-3 text-gray-600 transition-transform duration-200 ${isExpanded ? 'rotate-180' : ''}`}
150
+ fill="none"
151
+ viewBox="0 0 24 24"
152
+ stroke="currentColor"
153
+ >
154
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 9l-7 7-7-7" />
155
+ </svg>
156
+ )}
157
+ </div>
158
+ <div className="flex items-center gap-2">
159
+ {!compact && value !== null && (
160
+ <span className={`text-[10px] font-medium ${quality.color} uppercase tracking-wide`}>
161
+ {quality.label}
162
+ </span>
163
+ )}
164
+ <span className={`font-semibold tabular-nums ${compact ? 'text-xs' : 'text-sm'}`}>
165
+ {value !== null ? (
166
+ <span className="text-white">{value.toFixed(1)}<span className="text-gray-500">/5</span></span>
167
+ ) : (
168
+ <span className="text-gray-600">—</span>
169
+ )}
170
+ </span>
171
+ </div>
172
+ </div>
173
+
174
+ {/* Progress bar with glass effect */}
175
+ <div className={`bg-gray-800/60 rounded-full overflow-hidden backdrop-blur-xs ${compact ? 'h-1.5' : 'h-2.5'}`}>
176
+ <div
177
+ className={`h-full rounded-full transition-all duration-700 ease-out ${dim.color}
178
+ ${value !== null && value >= 4 ? dim.glowColor : ''}`}
179
+ style={{
180
+ width: `${percentage}%`,
181
+ transitionDelay: `${index * 50}ms`
182
+ }}
183
+ />
184
+ </div>
185
+ </button>
186
+
187
+ {/* Expandable reasoning - Premium glass panel */}
188
+ {isExpanded && reasoning && (
189
+ <div className="mt-2 ml-4 animate-fade-in">
190
+ <div className={`p-3 rounded-xl ${dim.bgColor} backdrop-blur-sm border border-white/5`}>
191
+ <div className="flex items-start gap-2">
192
+ <svg className="w-4 h-4 text-gray-400 mt-0.5 flex-shrink-0" fill="none" viewBox="0 0 24 24" stroke="currentColor">
193
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5}
194
+ d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z" />
195
+ </svg>
196
+ <p className="text-xs text-gray-300 leading-relaxed">
197
+ {reasoning}
198
+ </p>
199
+ </div>
200
+ </div>
201
+ </div>
202
+ )}
203
+ </div>
204
+ );
205
+ })}
206
+ </div>
207
+ );
208
+ };
209
+
210
+ export default DimensionBreakdown;