@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -1,261 +0,0 @@
1
- /**
2
- * PsychodynamicQuadrant Component
3
- *
4
- * 2D scatter visualization of psychodynamic parameters:
5
- * - X-axis: superegoCompliance (0.0-1.0)
6
- * - Y-axis: recognitionSeeking (0.0-1.0)
7
- *
8
- * Four quadrants represent different tutor-learner dynamics:
9
- * - Top-right (high/high): Dialogical Recognition (ideal)
10
- * - Top-left (low/high): Permissive Responsive
11
- * - Bottom-right (high/low): Traditional Authoritarian
12
- * - Bottom-left (low/low): Disengaged
13
- */
14
-
15
- import React from 'react';
16
-
17
- interface HistoricalPoint {
18
- compliance: number;
19
- seeking: number;
20
- timestamp: string;
21
- }
22
-
23
- interface PsychodynamicQuadrantProps {
24
- superegoCompliance: number;
25
- recognitionSeeking: number;
26
- historicalPoints?: HistoricalPoint[];
27
- size?: number;
28
- }
29
-
30
- export const PsychodynamicQuadrant: React.FC<PsychodynamicQuadrantProps> = ({
31
- superegoCompliance,
32
- recognitionSeeking,
33
- historicalPoints = [],
34
- size = 200,
35
- }) => {
36
- const padding = 40;
37
- const chartSize = size - padding * 2;
38
-
39
- // Convert values (0-1) to chart coordinates
40
- const toX = (value: number) => padding + value * chartSize;
41
- const toY = (value: number) => padding + (1 - value) * chartSize; // Invert Y
42
-
43
- // Current position
44
- const currentX = toX(superegoCompliance);
45
- const currentY = toY(recognitionSeeking);
46
-
47
- // Quadrant labels and positions
48
- const quadrants = [
49
- {
50
- label: 'Permissive',
51
- sublabel: 'Responsive',
52
- x: padding + chartSize * 0.25,
53
- y: padding + chartSize * 0.25,
54
- color: 'text-blue-400/60',
55
- },
56
- {
57
- label: 'Dialogical',
58
- sublabel: 'Recognition',
59
- x: padding + chartSize * 0.75,
60
- y: padding + chartSize * 0.25,
61
- color: 'text-green-400/60',
62
- },
63
- {
64
- label: 'Disengaged',
65
- sublabel: '',
66
- x: padding + chartSize * 0.25,
67
- y: padding + chartSize * 0.75,
68
- color: 'text-gray-500/60',
69
- },
70
- {
71
- label: 'Traditional',
72
- sublabel: 'Authoritarian',
73
- x: padding + chartSize * 0.75,
74
- y: padding + chartSize * 0.75,
75
- color: 'text-red-400/60',
76
- },
77
- ];
78
-
79
- // Determine current quadrant for highlight
80
- const getQuadrantName = () => {
81
- if (superegoCompliance >= 0.5 && recognitionSeeking >= 0.5) return 'Dialogical Recognition';
82
- if (superegoCompliance < 0.5 && recognitionSeeking >= 0.5) return 'Permissive Responsive';
83
- if (superegoCompliance >= 0.5 && recognitionSeeking < 0.5) return 'Traditional Authoritarian';
84
- return 'Disengaged';
85
- };
86
-
87
- return (
88
- <div className="bg-gray-900/60 backdrop-blur-sm border border-white/5 rounded-xl p-4">
89
- <div className="text-xs text-gray-400 mb-3">Psychodynamic Quadrant</div>
90
-
91
- <svg width={size} height={size} className="mx-auto">
92
- {/* Background gradient for quadrants */}
93
- <defs>
94
- <linearGradient id="quadrantBg" x1="0%" y1="0%" x2="100%" y2="100%">
95
- <stop offset="0%" stopColor="#3b82f6" stopOpacity="0.1" />
96
- <stop offset="50%" stopColor="#22c55e" stopOpacity="0.15" />
97
- <stop offset="100%" stopColor="#ef4444" stopOpacity="0.1" />
98
- </linearGradient>
99
- <radialGradient id="pointGlow">
100
- <stop offset="0%" stopColor="#E63946" stopOpacity="0.8" />
101
- <stop offset="100%" stopColor="#E63946" stopOpacity="0" />
102
- </radialGradient>
103
- </defs>
104
-
105
- {/* Chart background */}
106
- <rect
107
- x={padding}
108
- y={padding}
109
- width={chartSize}
110
- height={chartSize}
111
- fill="url(#quadrantBg)"
112
- rx="4"
113
- />
114
-
115
- {/* Grid lines */}
116
- <line
117
- x1={padding + chartSize / 2}
118
- y1={padding}
119
- x2={padding + chartSize / 2}
120
- y2={padding + chartSize}
121
- stroke="white"
122
- strokeOpacity="0.1"
123
- strokeDasharray="4,4"
124
- />
125
- <line
126
- x1={padding}
127
- y1={padding + chartSize / 2}
128
- x2={padding + chartSize}
129
- y2={padding + chartSize / 2}
130
- stroke="white"
131
- strokeOpacity="0.1"
132
- strokeDasharray="4,4"
133
- />
134
-
135
- {/* Quadrant labels */}
136
- {quadrants.map((q, i) => (
137
- <g key={i}>
138
- <text
139
- x={q.x}
140
- y={q.y - 6}
141
- textAnchor="middle"
142
- className={`text-[9px] ${q.color} fill-current`}
143
- >
144
- {q.label}
145
- </text>
146
- {q.sublabel && (
147
- <text
148
- x={q.x}
149
- y={q.y + 6}
150
- textAnchor="middle"
151
- className={`text-[9px] ${q.color} fill-current`}
152
- >
153
- {q.sublabel}
154
- </text>
155
- )}
156
- </g>
157
- ))}
158
-
159
- {/* Historical trail */}
160
- {historicalPoints.length > 1 && (
161
- <polyline
162
- points={historicalPoints
163
- .map((p) => `${toX(p.compliance)},${toY(p.seeking)}`)
164
- .join(' ')}
165
- fill="none"
166
- stroke="#E63946"
167
- strokeOpacity="0.3"
168
- strokeWidth="1"
169
- />
170
- )}
171
-
172
- {/* Historical points (fading) */}
173
- {historicalPoints.map((point, i) => {
174
- const opacity = 0.2 + (i / historicalPoints.length) * 0.4;
175
- return (
176
- <circle
177
- key={i}
178
- cx={toX(point.compliance)}
179
- cy={toY(point.seeking)}
180
- r={3}
181
- fill="#E63946"
182
- fillOpacity={opacity}
183
- />
184
- );
185
- })}
186
-
187
- {/* Current position glow */}
188
- <circle cx={currentX} cy={currentY} r={20} fill="url(#pointGlow)" />
189
-
190
- {/* Current position */}
191
- <circle
192
- cx={currentX}
193
- cy={currentY}
194
- r={8}
195
- fill="#E63946"
196
- stroke="white"
197
- strokeWidth="2"
198
- />
199
-
200
- {/* Axis labels */}
201
- <text
202
- x={padding + chartSize / 2}
203
- y={size - 8}
204
- textAnchor="middle"
205
- className="text-[10px] text-gray-500 fill-current"
206
- >
207
- Superego Compliance
208
- </text>
209
- <text
210
- x={12}
211
- y={padding + chartSize / 2}
212
- textAnchor="middle"
213
- className="text-[10px] text-gray-500 fill-current"
214
- transform={`rotate(-90, 12, ${padding + chartSize / 2})`}
215
- >
216
- Recognition Seeking
217
- </text>
218
-
219
- {/* Axis scale markers */}
220
- <text
221
- x={padding}
222
- y={size - 8}
223
- textAnchor="middle"
224
- className="text-[8px] text-gray-600 fill-current"
225
- >
226
- 0
227
- </text>
228
- <text
229
- x={padding + chartSize}
230
- y={size - 8}
231
- textAnchor="middle"
232
- className="text-[8px] text-gray-600 fill-current"
233
- >
234
- 1
235
- </text>
236
- </svg>
237
-
238
- {/* Current values and quadrant */}
239
- <div className="mt-3 pt-3 border-t border-white/5 space-y-2">
240
- <div className="flex justify-between text-xs">
241
- <span className="text-gray-500">Compliance</span>
242
- <span className="text-white font-medium">
243
- {(superegoCompliance * 100).toFixed(0)}%
244
- </span>
245
- </div>
246
- <div className="flex justify-between text-xs">
247
- <span className="text-gray-500">Recognition</span>
248
- <span className="text-white font-medium">
249
- {(recognitionSeeking * 100).toFixed(0)}%
250
- </span>
251
- </div>
252
- <div className="flex justify-between text-xs pt-2 border-t border-white/5">
253
- <span className="text-gray-500">Quadrant</span>
254
- <span className="text-[#E63946] font-medium">{getQuadrantName()}</span>
255
- </div>
256
- </div>
257
- </div>
258
- );
259
- };
260
-
261
- export default PsychodynamicQuadrant;