@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
@@ -0,0 +1,363 @@
1
+ /**
2
+ * DocsView Component
3
+ *
4
+ * Browse and read evaluation documentation.
5
+ * Displays doc list and renders markdown content.
6
+ */
7
+
8
+ import React, { useEffect, useState, useCallback } from 'react';
9
+ import type { EvalDoc } from '../../types';
10
+ import haptics from '../../utils/haptics';
11
+
12
+ interface DocsViewProps {
13
+ docs: EvalDoc[];
14
+ isLoading: boolean;
15
+ onLoadDocs: () => Promise<void>;
16
+ onLoadDocContent: (name: string) => Promise<string | null>;
17
+ }
18
+
19
+ // Simple markdown renderer for mobile - Premium glass styling
20
+ // Handles basic formatting without heavy dependencies
21
+ const SimpleMarkdown: React.FC<{ content: string }> = ({ content }) => {
22
+ // Process markdown into HTML-safe segments
23
+ const renderContent = () => {
24
+ const lines = content.split('\n');
25
+ const elements: React.ReactNode[] = [];
26
+ let inCodeBlock = false;
27
+ let codeContent = '';
28
+ let codeLanguage = '';
29
+ let listItems: string[] = [];
30
+
31
+ const flushList = () => {
32
+ if (listItems.length > 0) {
33
+ elements.push(
34
+ <ul key={`list-${elements.length}`} className="space-y-2 my-4 ml-4">
35
+ {listItems.map((item, i) => (
36
+ <li key={i} className="text-sm text-gray-300 flex items-start gap-2">
37
+ <span className="w-1.5 h-1.5 rounded-full bg-[#E63946]/60 mt-2 flex-shrink-0" />
38
+ <span>{item}</span>
39
+ </li>
40
+ ))}
41
+ </ul>
42
+ );
43
+ listItems = [];
44
+ }
45
+ };
46
+
47
+ lines.forEach((line, i) => {
48
+ // Code block handling - Glass style
49
+ if (line.startsWith('```')) {
50
+ if (inCodeBlock) {
51
+ elements.push(
52
+ <pre key={`code-${i}`} className="bg-gray-900/80 backdrop-blur-sm border border-white/5 rounded-xl p-4 overflow-x-auto my-4">
53
+ <code className="text-xs text-gray-300 font-mono leading-relaxed">{codeContent}</code>
54
+ </pre>
55
+ );
56
+ codeContent = '';
57
+ inCodeBlock = false;
58
+ } else {
59
+ flushList();
60
+ inCodeBlock = true;
61
+ codeLanguage = line.slice(3);
62
+ }
63
+ return;
64
+ }
65
+
66
+ if (inCodeBlock) {
67
+ codeContent += (codeContent ? '\n' : '') + line;
68
+ return;
69
+ }
70
+
71
+ // Headers - Enhanced styling
72
+ if (line.startsWith('# ')) {
73
+ flushList();
74
+ elements.push(
75
+ <h1 key={i} className="text-xl font-bold text-white mt-8 mb-4 flex items-center gap-2">
76
+ <span className="w-1 h-6 bg-gradient-to-b from-[#E63946] to-[#E63946]/30 rounded-full" />
77
+ {line.slice(2)}
78
+ </h1>
79
+ );
80
+ return;
81
+ }
82
+ if (line.startsWith('## ')) {
83
+ flushList();
84
+ elements.push(
85
+ <h2 key={i} className="text-lg font-semibold text-white mt-6 mb-3 flex items-center gap-2">
86
+ <span className="w-0.5 h-5 bg-[#E63946]/60 rounded-full" />
87
+ {line.slice(3)}
88
+ </h2>
89
+ );
90
+ return;
91
+ }
92
+ if (line.startsWith('### ')) {
93
+ flushList();
94
+ elements.push(
95
+ <h3 key={i} className="text-base font-medium text-gray-200 mt-5 mb-2">{line.slice(4)}</h3>
96
+ );
97
+ return;
98
+ }
99
+
100
+ // List items
101
+ if (line.match(/^[-*]\s/)) {
102
+ listItems.push(line.slice(2));
103
+ return;
104
+ }
105
+ if (line.match(/^\d+\.\s/)) {
106
+ listItems.push(line.replace(/^\d+\.\s/, ''));
107
+ return;
108
+ }
109
+
110
+ // Empty line - flush list
111
+ if (line.trim() === '') {
112
+ flushList();
113
+ return;
114
+ }
115
+
116
+ // Regular paragraph
117
+ flushList();
118
+
119
+ // Process inline formatting - Premium styling
120
+ let processedLine = line
121
+ // Bold
122
+ .replace(/\*\*(.+?)\*\*/g, '<strong class="text-white font-semibold">$1</strong>')
123
+ // Italic
124
+ .replace(/\*(.+?)\*/g, '<em class="italic text-gray-200">$1</em>')
125
+ // Inline code - Glass style
126
+ .replace(/`([^`]+)`/g, '<code class="bg-gray-800/60 px-1.5 py-0.5 rounded-md text-[#E63946] text-xs font-mono border border-white/5">$1</code>')
127
+ // Links
128
+ .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" class="text-[#E63946] hover:text-[#d62839] underline underline-offset-2 transition-colors" target="_blank" rel="noopener">$1</a>');
129
+
130
+ elements.push(
131
+ <p
132
+ key={i}
133
+ className="text-sm text-gray-300 my-3 leading-relaxed"
134
+ dangerouslySetInnerHTML={{ __html: processedLine }}
135
+ />
136
+ );
137
+ });
138
+
139
+ flushList();
140
+ return elements;
141
+ };
142
+
143
+ return <div className="prose-mobile">{renderContent()}</div>;
144
+ };
145
+
146
+ // Format file size
147
+ function formatSize(bytes: number): string {
148
+ if (bytes < 1024) return `${bytes} B`;
149
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
150
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
151
+ }
152
+
153
+ export const DocsView: React.FC<DocsViewProps> = ({
154
+ docs,
155
+ isLoading,
156
+ onLoadDocs,
157
+ onLoadDocContent
158
+ }) => {
159
+ const [selectedDoc, setSelectedDoc] = useState<EvalDoc | null>(null);
160
+ const [docContent, setDocContent] = useState<string | null>(null);
161
+ const [isLoadingContent, setIsLoadingContent] = useState(false);
162
+
163
+ // Load docs on mount
164
+ useEffect(() => {
165
+ if (docs.length === 0) {
166
+ onLoadDocs();
167
+ }
168
+ }, [docs.length, onLoadDocs]);
169
+
170
+ // Load doc content when selected
171
+ const handleSelectDoc = useCallback(async (doc: EvalDoc) => {
172
+ haptics.light();
173
+ setSelectedDoc(doc);
174
+ setIsLoadingContent(true);
175
+ const content = await onLoadDocContent(doc.name);
176
+ setDocContent(content);
177
+ setIsLoadingContent(false);
178
+ }, [onLoadDocContent]);
179
+
180
+ // Go back to list
181
+ const handleBack = useCallback(() => {
182
+ haptics.light();
183
+ setSelectedDoc(null);
184
+ setDocContent(null);
185
+ }, []);
186
+
187
+ // Document content view - Premium glass styling
188
+ if (selectedDoc) {
189
+ return (
190
+ <div className="h-full flex flex-col overflow-hidden">
191
+ {/* Header - Glass */}
192
+ <div className="flex-shrink-0 p-4 border-b border-white/5 bg-gray-900/30 backdrop-blur-sm">
193
+ <button
194
+ type="button"
195
+ onClick={handleBack}
196
+ className="flex items-center gap-2 text-sm text-gray-400 hover:text-white transition-colors mb-3
197
+ active:scale-[0.98]"
198
+ >
199
+ <div className="w-7 h-7 rounded-full bg-white/5 flex items-center justify-center">
200
+ <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
201
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
202
+ </svg>
203
+ </div>
204
+ Back to docs
205
+ </button>
206
+ <h2 className="text-lg font-semibold text-white">{selectedDoc.title}</h2>
207
+ <p className="text-xs text-gray-500 mt-1 font-mono">{selectedDoc.filename}</p>
208
+ </div>
209
+
210
+ {/* Content */}
211
+ <div className="flex-1 overflow-y-auto p-4">
212
+ {isLoadingContent && (
213
+ <div className="flex items-center justify-center h-48">
214
+ <div className="flex flex-col items-center gap-4">
215
+ <div className="relative">
216
+ <div className="absolute inset-0 rounded-full bg-gradient-to-r from-[#E63946]/20 to-[#d62839]/20 animate-spin"
217
+ style={{ animationDuration: '3s' }} />
218
+ <div className="relative w-14 h-14 rounded-full bg-gray-900/80 backdrop-blur-sm border border-white/10
219
+ flex items-center justify-center">
220
+ <svg className="w-6 h-6 text-[#E63946] animate-spin" fill="none" viewBox="0 0 24 24">
221
+ <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
222
+ <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
223
+ </svg>
224
+ </div>
225
+ </div>
226
+ <span className="text-sm text-gray-400 font-medium">Loading document...</span>
227
+ </div>
228
+ </div>
229
+ )}
230
+
231
+ {!isLoadingContent && docContent && (
232
+ <SimpleMarkdown content={docContent} />
233
+ )}
234
+
235
+ {!isLoadingContent && !docContent && (
236
+ <div className="flex flex-col items-center justify-center h-48">
237
+ <div className="w-16 h-16 rounded-full bg-red-500/10 border border-red-500/20 flex items-center justify-center mb-4">
238
+ <svg className="w-8 h-8 text-red-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
239
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5}
240
+ d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
241
+ </svg>
242
+ </div>
243
+ <p className="text-sm text-gray-400 font-medium">Failed to load document</p>
244
+ <button
245
+ type="button"
246
+ onClick={() => handleSelectDoc(selectedDoc)}
247
+ className="mt-3 px-4 py-2 text-xs bg-white/5 border border-white/10 rounded-lg text-gray-300
248
+ hover:bg-white/10 active:scale-[0.98] transition-all"
249
+ >
250
+ Try again
251
+ </button>
252
+ </div>
253
+ )}
254
+
255
+ {/* Bottom padding */}
256
+ <div className="h-8" />
257
+ </div>
258
+ </div>
259
+ );
260
+ }
261
+
262
+ // Document list view - Premium glass styling
263
+ return (
264
+ <div className="h-full overflow-y-auto">
265
+ {/* Loading state - Premium animated */}
266
+ {isLoading && docs.length === 0 && (
267
+ <div className="flex items-center justify-center h-64">
268
+ <div className="flex flex-col items-center gap-4">
269
+ <div className="relative">
270
+ <div className="absolute inset-0 rounded-full bg-gradient-to-r from-[#E63946]/20 to-[#d62839]/20 animate-spin"
271
+ style={{ animationDuration: '3s' }} />
272
+ <div className="relative w-16 h-16 rounded-full bg-gray-900/80 backdrop-blur-sm border border-white/10
273
+ flex items-center justify-center">
274
+ <svg className="w-8 h-8 text-[#E63946] animate-spin" fill="none" viewBox="0 0 24 24">
275
+ <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
276
+ <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
277
+ </svg>
278
+ </div>
279
+ </div>
280
+ <span className="text-sm text-gray-400 font-medium">Loading docs...</span>
281
+ </div>
282
+ </div>
283
+ )}
284
+
285
+ {/* Empty state - Enhanced with animation */}
286
+ {!isLoading && docs.length === 0 && (
287
+ <div className="flex flex-col items-center justify-center h-64 text-gray-500 px-4">
288
+ <div className="relative mb-6">
289
+ <div className="absolute inset-0 rounded-full bg-gradient-to-r from-gray-600/20 via-transparent to-gray-600/20 animate-spin"
290
+ style={{ animationDuration: '8s' }} />
291
+ <div className="relative w-20 h-20 rounded-full bg-gray-900/50 backdrop-blur-sm border border-white/5
292
+ flex items-center justify-center">
293
+ <svg className="w-10 h-10 text-gray-600" fill="none" viewBox="0 0 24 24" stroke="currentColor">
294
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5}
295
+ d="M12 6.253v13m0-13C10.832 5.477 9.246 5 7.5 5S4.168 5.477 3 6.253v13C4.168 18.477 5.754 18 7.5 18s3.332.477 4.5 1.253m0-13C13.168 5.477 14.754 5 16.5 5c1.747 0 3.332.477 4.5 1.253v13C19.832 18.477 18.247 18 16.5 18c-1.746 0-3.332.477-4.5 1.253" />
296
+ </svg>
297
+ </div>
298
+ </div>
299
+ <p className="text-sm font-medium text-gray-400">No documentation available</p>
300
+ <p className="text-xs text-gray-600 mt-1">Add docs to the evaluation folder</p>
301
+ </div>
302
+ )}
303
+
304
+ {/* Doc list - Glass cards */}
305
+ {docs.length > 0 && (
306
+ <div className="p-3 space-y-2">
307
+ {docs.map((doc) => (
308
+ <button
309
+ key={doc.name}
310
+ type="button"
311
+ onClick={() => handleSelectDoc(doc)}
312
+ className="w-full p-4 text-left bg-gray-900/60 backdrop-blur-sm border border-white/5
313
+ rounded-xl active:scale-[0.99] active:bg-gray-800/80 transition-all duration-150"
314
+ >
315
+ <div className="flex items-start justify-between gap-3">
316
+ <div className="flex-1 min-w-0">
317
+ {/* Icon and title */}
318
+ <div className="flex items-center gap-3 mb-2">
319
+ <div className="w-10 h-10 rounded-xl bg-[#E63946]/10 border border-[#E63946]/20
320
+ flex items-center justify-center flex-shrink-0">
321
+ <svg className="w-5 h-5 text-[#E63946]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
322
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
323
+ d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
324
+ </svg>
325
+ </div>
326
+ <div className="min-w-0">
327
+ <span className="text-sm font-medium text-white line-clamp-1 block">
328
+ {doc.title}
329
+ </span>
330
+ {/* Filename */}
331
+ <span className="text-xs text-gray-500 font-mono line-clamp-1 block mt-0.5">
332
+ {doc.filename}
333
+ </span>
334
+ </div>
335
+ </div>
336
+
337
+ {/* Size badge */}
338
+ <div className="flex items-center gap-2 text-xs">
339
+ <span className="px-2 py-0.5 bg-white/5 border border-white/10 rounded-full text-gray-400">
340
+ {formatSize(doc.size)}
341
+ </span>
342
+ </div>
343
+ </div>
344
+
345
+ {/* Chevron */}
346
+ <div className="w-8 h-8 rounded-full bg-white/5 flex items-center justify-center flex-shrink-0">
347
+ <svg className="w-4 h-4 text-gray-500" fill="none" viewBox="0 0 24 24" stroke="currentColor">
348
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
349
+ </svg>
350
+ </div>
351
+ </div>
352
+ </button>
353
+ ))}
354
+ </div>
355
+ )}
356
+
357
+ {/* Bottom padding */}
358
+ <div className="h-4" />
359
+ </div>
360
+ );
361
+ };
362
+
363
+ export default DocsView;