@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -1,363 +0,0 @@
1
- /**
2
- * DocsView Component
3
- *
4
- * Browse and read evaluation documentation.
5
- * Displays doc list and renders markdown content.
6
- */
7
-
8
- import React, { useEffect, useState, useCallback } from 'react';
9
- import type { EvalDoc } from '../../types';
10
- import haptics from '../../utils/haptics';
11
-
12
- interface DocsViewProps {
13
- docs: EvalDoc[];
14
- isLoading: boolean;
15
- onLoadDocs: () => Promise<void>;
16
- onLoadDocContent: (name: string) => Promise<string | null>;
17
- }
18
-
19
- // Simple markdown renderer for mobile - Premium glass styling
20
- // Handles basic formatting without heavy dependencies
21
- const SimpleMarkdown: React.FC<{ content: string }> = ({ content }) => {
22
- // Process markdown into HTML-safe segments
23
- const renderContent = () => {
24
- const lines = content.split('\n');
25
- const elements: React.ReactNode[] = [];
26
- let inCodeBlock = false;
27
- let codeContent = '';
28
- let codeLanguage = '';
29
- let listItems: string[] = [];
30
-
31
- const flushList = () => {
32
- if (listItems.length > 0) {
33
- elements.push(
34
- <ul key={`list-${elements.length}`} className="space-y-2 my-4 ml-4">
35
- {listItems.map((item, i) => (
36
- <li key={i} className="text-sm text-gray-300 flex items-start gap-2">
37
- <span className="w-1.5 h-1.5 rounded-full bg-[#E63946]/60 mt-2 flex-shrink-0" />
38
- <span>{item}</span>
39
- </li>
40
- ))}
41
- </ul>
42
- );
43
- listItems = [];
44
- }
45
- };
46
-
47
- lines.forEach((line, i) => {
48
- // Code block handling - Glass style
49
- if (line.startsWith('```')) {
50
- if (inCodeBlock) {
51
- elements.push(
52
- <pre key={`code-${i}`} className="bg-gray-900/80 backdrop-blur-sm border border-white/5 rounded-xl p-4 overflow-x-auto my-4">
53
- <code className="text-xs text-gray-300 font-mono leading-relaxed">{codeContent}</code>
54
- </pre>
55
- );
56
- codeContent = '';
57
- inCodeBlock = false;
58
- } else {
59
- flushList();
60
- inCodeBlock = true;
61
- codeLanguage = line.slice(3);
62
- }
63
- return;
64
- }
65
-
66
- if (inCodeBlock) {
67
- codeContent += (codeContent ? '\n' : '') + line;
68
- return;
69
- }
70
-
71
- // Headers - Enhanced styling
72
- if (line.startsWith('# ')) {
73
- flushList();
74
- elements.push(
75
- <h1 key={i} className="text-xl font-bold text-white mt-8 mb-4 flex items-center gap-2">
76
- <span className="w-1 h-6 bg-gradient-to-b from-[#E63946] to-[#E63946]/30 rounded-full" />
77
- {line.slice(2)}
78
- </h1>
79
- );
80
- return;
81
- }
82
- if (line.startsWith('## ')) {
83
- flushList();
84
- elements.push(
85
- <h2 key={i} className="text-lg font-semibold text-white mt-6 mb-3 flex items-center gap-2">
86
- <span className="w-0.5 h-5 bg-[#E63946]/60 rounded-full" />
87
- {line.slice(3)}
88
- </h2>
89
- );
90
- return;
91
- }
92
- if (line.startsWith('### ')) {
93
- flushList();
94
- elements.push(
95
- <h3 key={i} className="text-base font-medium text-gray-200 mt-5 mb-2">{line.slice(4)}</h3>
96
- );
97
- return;
98
- }
99
-
100
- // List items
101
- if (line.match(/^[-*]\s/)) {
102
- listItems.push(line.slice(2));
103
- return;
104
- }
105
- if (line.match(/^\d+\.\s/)) {
106
- listItems.push(line.replace(/^\d+\.\s/, ''));
107
- return;
108
- }
109
-
110
- // Empty line - flush list
111
- if (line.trim() === '') {
112
- flushList();
113
- return;
114
- }
115
-
116
- // Regular paragraph
117
- flushList();
118
-
119
- // Process inline formatting - Premium styling
120
- let processedLine = line
121
- // Bold
122
- .replace(/\*\*(.+?)\*\*/g, '<strong class="text-white font-semibold">$1</strong>')
123
- // Italic
124
- .replace(/\*(.+?)\*/g, '<em class="italic text-gray-200">$1</em>')
125
- // Inline code - Glass style
126
- .replace(/`([^`]+)`/g, '<code class="bg-gray-800/60 px-1.5 py-0.5 rounded-md text-[#E63946] text-xs font-mono border border-white/5">$1</code>')
127
- // Links
128
- .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2" class="text-[#E63946] hover:text-[#d62839] underline underline-offset-2 transition-colors" target="_blank" rel="noopener">$1</a>');
129
-
130
- elements.push(
131
- <p
132
- key={i}
133
- className="text-sm text-gray-300 my-3 leading-relaxed"
134
- dangerouslySetInnerHTML={{ __html: processedLine }}
135
- />
136
- );
137
- });
138
-
139
- flushList();
140
- return elements;
141
- };
142
-
143
- return <div className="prose-mobile">{renderContent()}</div>;
144
- };
145
-
146
- // Format file size
147
- function formatSize(bytes: number): string {
148
- if (bytes < 1024) return `${bytes} B`;
149
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
150
- return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
151
- }
152
-
153
- export const DocsView: React.FC<DocsViewProps> = ({
154
- docs,
155
- isLoading,
156
- onLoadDocs,
157
- onLoadDocContent
158
- }) => {
159
- const [selectedDoc, setSelectedDoc] = useState<EvalDoc | null>(null);
160
- const [docContent, setDocContent] = useState<string | null>(null);
161
- const [isLoadingContent, setIsLoadingContent] = useState(false);
162
-
163
- // Load docs on mount
164
- useEffect(() => {
165
- if (docs.length === 0) {
166
- onLoadDocs();
167
- }
168
- }, [docs.length, onLoadDocs]);
169
-
170
- // Load doc content when selected
171
- const handleSelectDoc = useCallback(async (doc: EvalDoc) => {
172
- haptics.light();
173
- setSelectedDoc(doc);
174
- setIsLoadingContent(true);
175
- const content = await onLoadDocContent(doc.name);
176
- setDocContent(content);
177
- setIsLoadingContent(false);
178
- }, [onLoadDocContent]);
179
-
180
- // Go back to list
181
- const handleBack = useCallback(() => {
182
- haptics.light();
183
- setSelectedDoc(null);
184
- setDocContent(null);
185
- }, []);
186
-
187
- // Document content view - Premium glass styling
188
- if (selectedDoc) {
189
- return (
190
- <div className="h-full flex flex-col overflow-hidden">
191
- {/* Header - Glass */}
192
- <div className="flex-shrink-0 p-4 border-b border-white/5 bg-gray-900/30 backdrop-blur-sm">
193
- <button
194
- type="button"
195
- onClick={handleBack}
196
- className="flex items-center gap-2 text-sm text-gray-400 hover:text-white transition-colors mb-3
197
- active:scale-[0.98]"
198
- >
199
- <div className="w-7 h-7 rounded-full bg-white/5 flex items-center justify-center">
200
- <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
201
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M15 19l-7-7 7-7" />
202
- </svg>
203
- </div>
204
- Back to docs
205
- </button>
206
- <h2 className="text-lg font-semibold text-white">{selectedDoc.title}</h2>
207
- <p className="text-xs text-gray-500 mt-1 font-mono">{selectedDoc.filename}</p>
208
- </div>
209
-
210
- {/* Content */}
211
- <div className="flex-1 overflow-y-auto p-4">
212
- {isLoadingContent && (
213
- <div className="flex items-center justify-center h-48">
214
- <div className="flex flex-col items-center gap-4">
215
- <div className="relative">
216
- <div className="absolute inset-0 rounded-full bg-gradient-to-r from-[#E63946]/20 to-[#d62839]/20 animate-spin"
217
- style={{ animationDuration: '3s' }} />
218
- <div className="relative w-14 h-14 rounded-full bg-gray-900/80 backdrop-blur-sm border border-white/10
219
- flex items-center justify-center">
220
- <svg className="w-6 h-6 text-[#E63946] animate-spin" fill="none" viewBox="0 0 24 24">
221
- <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
222
- <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
223
- </svg>
224
- </div>
225
- </div>
226
- <span className="text-sm text-gray-400 font-medium">Loading document...</span>
227
- </div>
228
- </div>
229
- )}
230
-
231
- {!isLoadingContent && docContent && (
232
- <SimpleMarkdown content={docContent} />
233
- )}
234
-
235
- {!isLoadingContent && !docContent && (
236
- <div className="flex flex-col items-center justify-center h-48">
237
- <div className="w-16 h-16 rounded-full bg-red-500/10 border border-red-500/20 flex items-center justify-center mb-4">
238
- <svg className="w-8 h-8 text-red-400" fill="none" viewBox="0 0 24 24" stroke="currentColor">
239
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5}
240
- d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
241
- </svg>
242
- </div>
243
- <p className="text-sm text-gray-400 font-medium">Failed to load document</p>
244
- <button
245
- type="button"
246
- onClick={() => handleSelectDoc(selectedDoc)}
247
- className="mt-3 px-4 py-2 text-xs bg-white/5 border border-white/10 rounded-lg text-gray-300
248
- hover:bg-white/10 active:scale-[0.98] transition-all"
249
- >
250
- Try again
251
- </button>
252
- </div>
253
- )}
254
-
255
- {/* Bottom padding */}
256
- <div className="h-8" />
257
- </div>
258
- </div>
259
- );
260
- }
261
-
262
- // Document list view - Premium glass styling
263
- return (
264
- <div className="h-full overflow-y-auto">
265
- {/* Loading state - Premium animated */}
266
- {isLoading && docs.length === 0 && (
267
- <div className="flex items-center justify-center h-64">
268
- <div className="flex flex-col items-center gap-4">
269
- <div className="relative">
270
- <div className="absolute inset-0 rounded-full bg-gradient-to-r from-[#E63946]/20 to-[#d62839]/20 animate-spin"
271
- style={{ animationDuration: '3s' }} />
272
- <div className="relative w-16 h-16 rounded-full bg-gray-900/80 backdrop-blur-sm border border-white/10
273
- flex items-center justify-center">
274
- <svg className="w-8 h-8 text-[#E63946] animate-spin" fill="none" viewBox="0 0 24 24">
275
- <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
276
- <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
277
- </svg>
278
- </div>
279
- </div>
280
- <span className="text-sm text-gray-400 font-medium">Loading docs...</span>
281
- </div>
282
- </div>
283
- )}
284
-
285
- {/* Empty state - Enhanced with animation */}
286
- {!isLoading && docs.length === 0 && (
287
- <div className="flex flex-col items-center justify-center h-64 text-gray-500 px-4">
288
- <div className="relative mb-6">
289
- <div className="absolute inset-0 rounded-full bg-gradient-to-r from-gray-600/20 via-transparent to-gray-600/20 animate-spin"
290
- style={{ animationDuration: '8s' }} />
291
- <div className="relative w-20 h-20 rounded-full bg-gray-900/50 backdrop-blur-sm border border-white/5
292
- flex items-center justify-center">
293
- <svg className="w-10 h-10 text-gray-600" fill="none" viewBox="0 0 24 24" stroke="currentColor">
294
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5}
295
- d="M12 6.253v13m0-13C10.832 5.477 9.246 5 7.5 5S4.168 5.477 3 6.253v13C4.168 18.477 5.754 18 7.5 18s3.332.477 4.5 1.253m0-13C13.168 5.477 14.754 5 16.5 5c1.747 0 3.332.477 4.5 1.253v13C19.832 18.477 18.247 18 16.5 18c-1.746 0-3.332.477-4.5 1.253" />
296
- </svg>
297
- </div>
298
- </div>
299
- <p className="text-sm font-medium text-gray-400">No documentation available</p>
300
- <p className="text-xs text-gray-600 mt-1">Add docs to the evaluation folder</p>
301
- </div>
302
- )}
303
-
304
- {/* Doc list - Glass cards */}
305
- {docs.length > 0 && (
306
- <div className="p-3 space-y-2">
307
- {docs.map((doc) => (
308
- <button
309
- key={doc.name}
310
- type="button"
311
- onClick={() => handleSelectDoc(doc)}
312
- className="w-full p-4 text-left bg-gray-900/60 backdrop-blur-sm border border-white/5
313
- rounded-xl active:scale-[0.99] active:bg-gray-800/80 transition-all duration-150"
314
- >
315
- <div className="flex items-start justify-between gap-3">
316
- <div className="flex-1 min-w-0">
317
- {/* Icon and title */}
318
- <div className="flex items-center gap-3 mb-2">
319
- <div className="w-10 h-10 rounded-xl bg-[#E63946]/10 border border-[#E63946]/20
320
- flex items-center justify-center flex-shrink-0">
321
- <svg className="w-5 h-5 text-[#E63946]" fill="none" viewBox="0 0 24 24" stroke="currentColor">
322
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
323
- d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
324
- </svg>
325
- </div>
326
- <div className="min-w-0">
327
- <span className="text-sm font-medium text-white line-clamp-1 block">
328
- {doc.title}
329
- </span>
330
- {/* Filename */}
331
- <span className="text-xs text-gray-500 font-mono line-clamp-1 block mt-0.5">
332
- {doc.filename}
333
- </span>
334
- </div>
335
- </div>
336
-
337
- {/* Size badge */}
338
- <div className="flex items-center gap-2 text-xs">
339
- <span className="px-2 py-0.5 bg-white/5 border border-white/10 rounded-full text-gray-400">
340
- {formatSize(doc.size)}
341
- </span>
342
- </div>
343
- </div>
344
-
345
- {/* Chevron */}
346
- <div className="w-8 h-8 rounded-full bg-white/5 flex items-center justify-center flex-shrink-0">
347
- <svg className="w-4 h-4 text-gray-500" fill="none" viewBox="0 0 24 24" stroke="currentColor">
348
- <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
349
- </svg>
350
- </div>
351
- </div>
352
- </button>
353
- ))}
354
- </div>
355
- )}
356
-
357
- {/* Bottom padding */}
358
- <div className="h-4" />
359
- </div>
360
- );
361
- };
362
-
363
- export default DocsView;