@machinespirits/eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/components/MobileEvalDashboard.tsx +267 -0
- package/components/comparison/DeltaAnalysisTable.tsx +137 -0
- package/components/comparison/ProfileComparisonCard.tsx +176 -0
- package/components/comparison/RecognitionABMode.tsx +385 -0
- package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
- package/components/comparison/WinnerIndicator.tsx +64 -0
- package/components/comparison/index.ts +5 -0
- package/components/mobile/BottomSheet.tsx +233 -0
- package/components/mobile/DimensionBreakdown.tsx +210 -0
- package/components/mobile/DocsView.tsx +363 -0
- package/components/mobile/LogsView.tsx +481 -0
- package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
- package/components/mobile/QuickTestView.tsx +1098 -0
- package/components/mobile/RecognitionTypeChart.tsx +124 -0
- package/components/mobile/RecognitionView.tsx +809 -0
- package/components/mobile/RunDetailView.tsx +261 -0
- package/components/mobile/RunHistoryView.tsx +367 -0
- package/components/mobile/ScoreRadial.tsx +211 -0
- package/components/mobile/StreamingLogPanel.tsx +230 -0
- package/components/mobile/SynthesisStrategyChart.tsx +140 -0
- package/config/interaction-eval-scenarios.yaml +832 -0
- package/config/learner-agents.yaml +248 -0
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
- package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
- package/docs/research/COST-ANALYSIS.md +56 -0
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
- package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
- package/docs/research/PAPER-UNIFIED.md +659 -0
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
- package/docs/research/apa.csl +2133 -0
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
- package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
- package/docs/research/paper-draft/full-paper.md +136 -0
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +515 -0
- package/docs/research/transcript-baseline.md +139 -0
- package/docs/research/transcript-recognition-multiagent.md +187 -0
- package/hooks/useEvalData.ts +625 -0
- package/index.js +27 -0
- package/package.json +73 -0
- package/routes/evalRoutes.js +3002 -0
- package/scripts/advanced-eval-analysis.js +351 -0
- package/scripts/analyze-eval-costs.js +378 -0
- package/scripts/analyze-eval-results.js +513 -0
- package/scripts/analyze-interaction-evals.js +368 -0
- package/server-init.js +45 -0
- package/server.js +162 -0
- package/services/benchmarkService.js +1892 -0
- package/services/evaluationRunner.js +739 -0
- package/services/evaluationStore.js +1121 -0
- package/services/learnerConfigLoader.js +385 -0
- package/services/learnerTutorInteractionEngine.js +857 -0
- package/services/memory/learnerMemoryService.js +1227 -0
- package/services/memory/learnerWritingPad.js +577 -0
- package/services/memory/tutorWritingPad.js +674 -0
- package/services/promptRecommendationService.js +493 -0
- package/services/rubricEvaluator.js +826 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RecognitionABMode Component
|
|
3
|
+
*
|
|
4
|
+
* Main controller for Recognition A/B comparison mode.
|
|
5
|
+
* - Manages SSE streaming to /api/eval/stream/recognition-ab
|
|
6
|
+
* - Displays side-by-side baseline vs recognition results
|
|
7
|
+
* - Shows delta analysis with winner indicators
|
|
8
|
+
* - Recognition-specific metrics panel for treatment profile
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import React, { useState, useRef, useCallback } from 'react';
|
|
12
|
+
import { ProfileComparisonCard } from './ProfileComparisonCard';
|
|
13
|
+
import { DeltaAnalysisTable } from './DeltaAnalysisTable';
|
|
14
|
+
import { RecognitionMetricsPanel } from './RecognitionMetricsPanel';
|
|
15
|
+
import { WinnerIndicator } from './WinnerIndicator';
|
|
16
|
+
|
|
17
|
+
interface DeltaEntry {
|
|
18
|
+
dimension: string;
|
|
19
|
+
baseline: number | null;
|
|
20
|
+
recognition: number | null;
|
|
21
|
+
delta: number;
|
|
22
|
+
deltaPercent: number;
|
|
23
|
+
significance: '' | '*' | '**';
|
|
24
|
+
winner: 'baseline' | 'recognition' | null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
interface DimensionAverages {
|
|
28
|
+
relevance: number | null;
|
|
29
|
+
specificity: number | null;
|
|
30
|
+
pedagogical: number | null;
|
|
31
|
+
personalization: number | null;
|
|
32
|
+
actionability: number | null;
|
|
33
|
+
tone: number | null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface SynthesisStrategies {
|
|
37
|
+
ghost_dominates: number;
|
|
38
|
+
learner_dominates: number;
|
|
39
|
+
dialectical_synthesis: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface OverallScores {
|
|
43
|
+
baseline: number | null;
|
|
44
|
+
recognition: number | null;
|
|
45
|
+
delta: number | null;
|
|
46
|
+
significance: '' | '*' | '**';
|
|
47
|
+
winner: 'baseline' | 'recognition' | null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface RecognitionMetrics {
|
|
51
|
+
momentsGenerated: number;
|
|
52
|
+
avgDialecticalDepth: number;
|
|
53
|
+
synthesisStrategies: SynthesisStrategies;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
interface LogEntry {
|
|
57
|
+
message: string;
|
|
58
|
+
level: string;
|
|
59
|
+
timestamp?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
interface ResultEntry {
|
|
63
|
+
profile: string;
|
|
64
|
+
scenarioId: string;
|
|
65
|
+
scenarioName: string;
|
|
66
|
+
passed: boolean;
|
|
67
|
+
score: number | null;
|
|
68
|
+
latencyMs: number;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
interface RecognitionABModeProps {
|
|
72
|
+
onRunComplete?: (runId: string) => void;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export const RecognitionABMode: React.FC<RecognitionABModeProps> = ({ onRunComplete }) => {
|
|
76
|
+
const [isRunning, setIsRunning] = useState(false);
|
|
77
|
+
const [progress, setProgress] = useState({ current: 0, total: 0, percentage: 0 });
|
|
78
|
+
const [currentScenario, setCurrentScenario] = useState('');
|
|
79
|
+
const [logs, setLogs] = useState<LogEntry[]>([]);
|
|
80
|
+
const [results, setResults] = useState<ResultEntry[]>([]);
|
|
81
|
+
|
|
82
|
+
// Final results state
|
|
83
|
+
const [dimensionAverages, setDimensionAverages] = useState<{
|
|
84
|
+
baseline: DimensionAverages;
|
|
85
|
+
recognition: DimensionAverages;
|
|
86
|
+
} | null>(null);
|
|
87
|
+
const [deltaAnalysis, setDeltaAnalysis] = useState<DeltaEntry[]>([]);
|
|
88
|
+
const [overallScores, setOverallScores] = useState<OverallScores | null>(null);
|
|
89
|
+
const [recognitionMetrics, setRecognitionMetrics] = useState<RecognitionMetrics | null>(null);
|
|
90
|
+
const [runId, setRunId] = useState<string | null>(null);
|
|
91
|
+
const [error, setError] = useState<string | null>(null);
|
|
92
|
+
|
|
93
|
+
const eventSourceRef = useRef<EventSource | null>(null);
|
|
94
|
+
const logsEndRef = useRef<HTMLDivElement>(null);
|
|
95
|
+
|
|
96
|
+
const addLog = useCallback((entry: LogEntry) => {
|
|
97
|
+
setLogs((prev) => [...prev.slice(-100), entry]); // Keep last 100 logs
|
|
98
|
+
}, []);
|
|
99
|
+
|
|
100
|
+
const runComparison = useCallback(async () => {
|
|
101
|
+
// Reset state
|
|
102
|
+
setIsRunning(true);
|
|
103
|
+
setProgress({ current: 0, total: 0, percentage: 0 });
|
|
104
|
+
setCurrentScenario('');
|
|
105
|
+
setLogs([]);
|
|
106
|
+
setResults([]);
|
|
107
|
+
setDimensionAverages(null);
|
|
108
|
+
setDeltaAnalysis([]);
|
|
109
|
+
setOverallScores(null);
|
|
110
|
+
setRecognitionMetrics(null);
|
|
111
|
+
setRunId(null);
|
|
112
|
+
setError(null);
|
|
113
|
+
|
|
114
|
+
// Close existing connection
|
|
115
|
+
if (eventSourceRef.current) {
|
|
116
|
+
eventSourceRef.current.close();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const es = new EventSource('/api/eval/stream/recognition-ab');
|
|
120
|
+
eventSourceRef.current = es;
|
|
121
|
+
|
|
122
|
+
es.addEventListener('start', (event) => {
|
|
123
|
+
const data = JSON.parse(event.data);
|
|
124
|
+
setProgress({ current: 0, total: data.totalTests, percentage: 0 });
|
|
125
|
+
addLog({ message: `Starting Recognition A/B test: ${data.scenarioCount} scenarios`, level: 'info' });
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
es.addEventListener('progress', (event) => {
|
|
129
|
+
const data = JSON.parse(event.data);
|
|
130
|
+
setProgress({ current: data.current, total: data.total, percentage: data.percentage });
|
|
131
|
+
setCurrentScenario(data.scenario);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
es.addEventListener('log', (event) => {
|
|
135
|
+
const data = JSON.parse(event.data);
|
|
136
|
+
addLog({ message: data.message, level: data.level, timestamp: data.timestamp });
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
es.addEventListener('result', (event) => {
|
|
140
|
+
const data = JSON.parse(event.data);
|
|
141
|
+
setResults((prev) => [...prev, data]);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
es.addEventListener('complete', (event) => {
|
|
145
|
+
const data = JSON.parse(event.data);
|
|
146
|
+
|
|
147
|
+
setDimensionAverages(data.dimensionAverages);
|
|
148
|
+
setDeltaAnalysis(data.deltaAnalysis);
|
|
149
|
+
setOverallScores(data.overallScores);
|
|
150
|
+
setRecognitionMetrics(data.recognitionMetrics);
|
|
151
|
+
setRunId(data.runId);
|
|
152
|
+
|
|
153
|
+
setIsRunning(false);
|
|
154
|
+
es.close();
|
|
155
|
+
eventSourceRef.current = null;
|
|
156
|
+
|
|
157
|
+
if (onRunComplete) {
|
|
158
|
+
onRunComplete(data.runId);
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
es.addEventListener('error', (event) => {
|
|
163
|
+
try {
|
|
164
|
+
const data = JSON.parse((event as MessageEvent).data);
|
|
165
|
+
setError(data.error);
|
|
166
|
+
addLog({ message: `Error: ${data.error}`, level: 'error' });
|
|
167
|
+
} catch {
|
|
168
|
+
setError('Connection error');
|
|
169
|
+
addLog({ message: 'Connection error', level: 'error' });
|
|
170
|
+
}
|
|
171
|
+
setIsRunning(false);
|
|
172
|
+
es.close();
|
|
173
|
+
eventSourceRef.current = null;
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
es.onerror = () => {
|
|
177
|
+
if (es.readyState === EventSource.CLOSED) {
|
|
178
|
+
setIsRunning(false);
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
}, [addLog, onRunComplete]);
|
|
182
|
+
|
|
183
|
+
const cancelRun = useCallback(() => {
|
|
184
|
+
if (eventSourceRef.current) {
|
|
185
|
+
eventSourceRef.current.close();
|
|
186
|
+
eventSourceRef.current = null;
|
|
187
|
+
}
|
|
188
|
+
setIsRunning(false);
|
|
189
|
+
addLog({ message: 'Test cancelled by user', level: 'warning' });
|
|
190
|
+
}, [addLog]);
|
|
191
|
+
|
|
192
|
+
// Calculate stats for each profile
|
|
193
|
+
const baselineResults = results.filter((r) => r.profile === 'baseline');
|
|
194
|
+
const recognitionResults = results.filter((r) => r.profile === 'recognition');
|
|
195
|
+
|
|
196
|
+
const baselineStats = {
|
|
197
|
+
testCount: baselineResults.length,
|
|
198
|
+
successCount: baselineResults.filter((r) => r.passed).length,
|
|
199
|
+
avgLatency: baselineResults.length > 0
|
|
200
|
+
? baselineResults.reduce((sum, r) => sum + r.latencyMs, 0) / baselineResults.length
|
|
201
|
+
: 0,
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
const recognitionStats = {
|
|
205
|
+
testCount: recognitionResults.length,
|
|
206
|
+
successCount: recognitionResults.filter((r) => r.passed).length,
|
|
207
|
+
avgLatency: recognitionResults.length > 0
|
|
208
|
+
? recognitionResults.reduce((sum, r) => sum + r.latencyMs, 0) / recognitionResults.length
|
|
209
|
+
: 0,
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
return (
|
|
213
|
+
<div className="space-y-4">
|
|
214
|
+
{/* Header */}
|
|
215
|
+
<div className="bg-gray-900/60 backdrop-blur-sm border border-white/5 rounded-xl p-4">
|
|
216
|
+
<div className="flex items-center justify-between mb-3">
|
|
217
|
+
<div className="flex items-center gap-3">
|
|
218
|
+
<span className="text-lg">⚖️</span>
|
|
219
|
+
<div>
|
|
220
|
+
<h3 className="text-sm font-medium text-white">Recognition A/B Comparison</h3>
|
|
221
|
+
<p className="text-[10px] text-gray-400">
|
|
222
|
+
baseline (control) vs recognition (treatment)
|
|
223
|
+
</p>
|
|
224
|
+
</div>
|
|
225
|
+
</div>
|
|
226
|
+
|
|
227
|
+
{isRunning ? (
|
|
228
|
+
<button
|
|
229
|
+
type="button"
|
|
230
|
+
onClick={cancelRun}
|
|
231
|
+
className="px-3 py-1.5 text-xs bg-red-500/20 text-red-400 rounded-lg border border-red-500/30 hover:bg-red-500/30 transition-colors active:scale-[0.98]"
|
|
232
|
+
>
|
|
233
|
+
Cancel
|
|
234
|
+
</button>
|
|
235
|
+
) : (
|
|
236
|
+
<button
|
|
237
|
+
type="button"
|
|
238
|
+
onClick={runComparison}
|
|
239
|
+
className="px-3 py-1.5 text-xs bg-yellow-500/20 text-yellow-400 rounded-lg border border-yellow-500/30 hover:bg-yellow-500/30 transition-colors active:scale-[0.98]"
|
|
240
|
+
>
|
|
241
|
+
Run A/B Test
|
|
242
|
+
</button>
|
|
243
|
+
)}
|
|
244
|
+
</div>
|
|
245
|
+
|
|
246
|
+
{/* Progress bar */}
|
|
247
|
+
{isRunning && (
|
|
248
|
+
<div className="space-y-2">
|
|
249
|
+
<div className="flex items-center justify-between text-[10px]">
|
|
250
|
+
<span className="text-gray-400">{currentScenario}</span>
|
|
251
|
+
<span className="text-gray-500">
|
|
252
|
+
{progress.current}/{progress.total} ({progress.percentage}%)
|
|
253
|
+
</span>
|
|
254
|
+
</div>
|
|
255
|
+
<div className="h-1.5 bg-gray-800 rounded-full overflow-hidden">
|
|
256
|
+
<div
|
|
257
|
+
className="h-full bg-gradient-to-r from-yellow-500 to-green-500 rounded-full transition-all duration-300"
|
|
258
|
+
style={{ width: `${progress.percentage}%` }}
|
|
259
|
+
/>
|
|
260
|
+
</div>
|
|
261
|
+
</div>
|
|
262
|
+
)}
|
|
263
|
+
|
|
264
|
+
{/* Error display */}
|
|
265
|
+
{error && (
|
|
266
|
+
<div className="mt-3 p-2 bg-red-500/10 border border-red-500/20 rounded-lg text-xs text-red-400">
|
|
267
|
+
{error}
|
|
268
|
+
</div>
|
|
269
|
+
)}
|
|
270
|
+
</div>
|
|
271
|
+
|
|
272
|
+
{/* Results section */}
|
|
273
|
+
{(dimensionAverages || isRunning) && (
|
|
274
|
+
<>
|
|
275
|
+
{/* Overall winner banner */}
|
|
276
|
+
{overallScores?.winner && (
|
|
277
|
+
<div className="bg-gradient-to-r from-yellow-500/10 to-green-500/10 border border-yellow-500/20 rounded-xl p-4 text-center">
|
|
278
|
+
<div className="text-xs text-gray-400 mb-2">Overall Winner</div>
|
|
279
|
+
<WinnerIndicator
|
|
280
|
+
winner={overallScores.winner}
|
|
281
|
+
significance={overallScores.significance}
|
|
282
|
+
size="lg"
|
|
283
|
+
/>
|
|
284
|
+
{overallScores.delta != null && (
|
|
285
|
+
<div className="mt-2 text-xs text-gray-500">
|
|
286
|
+
{overallScores.delta > 0 ? '+' : ''}
|
|
287
|
+
{overallScores.delta.toFixed(1)} points difference
|
|
288
|
+
</div>
|
|
289
|
+
)}
|
|
290
|
+
</div>
|
|
291
|
+
)}
|
|
292
|
+
|
|
293
|
+
{/* Profile comparison cards */}
|
|
294
|
+
<div className="grid grid-cols-2 gap-3">
|
|
295
|
+
<ProfileComparisonCard
|
|
296
|
+
profile="baseline"
|
|
297
|
+
overallScore={overallScores?.baseline ?? null}
|
|
298
|
+
delta={null}
|
|
299
|
+
dimensionAverages={dimensionAverages?.baseline ?? {
|
|
300
|
+
relevance: null,
|
|
301
|
+
specificity: null,
|
|
302
|
+
pedagogical: null,
|
|
303
|
+
personalization: null,
|
|
304
|
+
actionability: null,
|
|
305
|
+
tone: null,
|
|
306
|
+
}}
|
|
307
|
+
testCount={baselineStats.testCount}
|
|
308
|
+
successCount={baselineStats.successCount}
|
|
309
|
+
avgLatency={baselineStats.avgLatency}
|
|
310
|
+
isWinner={overallScores?.winner === 'baseline'}
|
|
311
|
+
/>
|
|
312
|
+
<ProfileComparisonCard
|
|
313
|
+
profile="recognition"
|
|
314
|
+
overallScore={overallScores?.recognition ?? null}
|
|
315
|
+
delta={overallScores?.delta ?? null}
|
|
316
|
+
dimensionAverages={dimensionAverages?.recognition ?? {
|
|
317
|
+
relevance: null,
|
|
318
|
+
specificity: null,
|
|
319
|
+
pedagogical: null,
|
|
320
|
+
personalization: null,
|
|
321
|
+
actionability: null,
|
|
322
|
+
tone: null,
|
|
323
|
+
}}
|
|
324
|
+
testCount={recognitionStats.testCount}
|
|
325
|
+
successCount={recognitionStats.successCount}
|
|
326
|
+
avgLatency={recognitionStats.avgLatency}
|
|
327
|
+
isWinner={overallScores?.winner === 'recognition'}
|
|
328
|
+
/>
|
|
329
|
+
</div>
|
|
330
|
+
|
|
331
|
+
{/* Recognition metrics (only for recognition profile) */}
|
|
332
|
+
{recognitionMetrics && (
|
|
333
|
+
<RecognitionMetricsPanel
|
|
334
|
+
momentsGenerated={recognitionMetrics.momentsGenerated}
|
|
335
|
+
avgDialecticalDepth={recognitionMetrics.avgDialecticalDepth}
|
|
336
|
+
synthesisStrategies={recognitionMetrics.synthesisStrategies}
|
|
337
|
+
/>
|
|
338
|
+
)}
|
|
339
|
+
|
|
340
|
+
{/* Delta analysis table */}
|
|
341
|
+
{deltaAnalysis.length > 0 && <DeltaAnalysisTable deltaAnalysis={deltaAnalysis} />}
|
|
342
|
+
</>
|
|
343
|
+
)}
|
|
344
|
+
|
|
345
|
+
{/* Logs panel (collapsible) */}
|
|
346
|
+
{logs.length > 0 && (
|
|
347
|
+
<details className="bg-gray-900/60 backdrop-blur-sm border border-white/5 rounded-xl">
|
|
348
|
+
<summary className="px-4 py-3 text-xs text-gray-400 cursor-pointer hover:text-gray-300">
|
|
349
|
+
Logs ({logs.length})
|
|
350
|
+
</summary>
|
|
351
|
+
<div className="px-4 pb-4 max-h-40 overflow-y-auto">
|
|
352
|
+
<div className="space-y-1 font-mono text-[10px]">
|
|
353
|
+
{logs.slice(-30).map((log, i) => (
|
|
354
|
+
<div
|
|
355
|
+
key={i}
|
|
356
|
+
className={
|
|
357
|
+
log.level === 'error'
|
|
358
|
+
? 'text-red-400'
|
|
359
|
+
: log.level === 'warning'
|
|
360
|
+
? 'text-yellow-400'
|
|
361
|
+
: log.level === 'success'
|
|
362
|
+
? 'text-green-400'
|
|
363
|
+
: 'text-gray-500'
|
|
364
|
+
}
|
|
365
|
+
>
|
|
366
|
+
{log.message}
|
|
367
|
+
</div>
|
|
368
|
+
))}
|
|
369
|
+
<div ref={logsEndRef} />
|
|
370
|
+
</div>
|
|
371
|
+
</div>
|
|
372
|
+
</details>
|
|
373
|
+
)}
|
|
374
|
+
|
|
375
|
+
{/* Run ID link */}
|
|
376
|
+
{runId && !isRunning && (
|
|
377
|
+
<div className="text-center text-[10px] text-gray-500">
|
|
378
|
+
Run ID: <span className="text-gray-400 font-mono">{runId}</span>
|
|
379
|
+
</div>
|
|
380
|
+
)}
|
|
381
|
+
</div>
|
|
382
|
+
);
|
|
383
|
+
};
|
|
384
|
+
|
|
385
|
+
export default RecognitionABMode;
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RecognitionMetricsPanel Component
|
|
3
|
+
*
|
|
4
|
+
* Displays recognition-specific metrics that only the recognition profile generates:
|
|
5
|
+
* - Moments generated (recognition events recorded)
|
|
6
|
+
* - Dialectical depth (quality of dialectical engagement)
|
|
7
|
+
* - Synthesis strategies distribution
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import React from 'react';
|
|
11
|
+
|
|
12
|
+
interface SynthesisStrategies {
|
|
13
|
+
ghost_dominates: number;
|
|
14
|
+
learner_dominates: number;
|
|
15
|
+
dialectical_synthesis: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
interface RecognitionMetricsPanelProps {
|
|
19
|
+
momentsGenerated: number;
|
|
20
|
+
avgDialecticalDepth: number;
|
|
21
|
+
synthesisStrategies: SynthesisStrategies;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const RecognitionMetricsPanel: React.FC<RecognitionMetricsPanelProps> = ({
|
|
25
|
+
momentsGenerated,
|
|
26
|
+
avgDialecticalDepth,
|
|
27
|
+
synthesisStrategies,
|
|
28
|
+
}) => {
|
|
29
|
+
const totalStrategies =
|
|
30
|
+
synthesisStrategies.ghost_dominates +
|
|
31
|
+
synthesisStrategies.learner_dominates +
|
|
32
|
+
synthesisStrategies.dialectical_synthesis;
|
|
33
|
+
|
|
34
|
+
const dialecticalPercent =
|
|
35
|
+
totalStrategies > 0
|
|
36
|
+
? (synthesisStrategies.dialectical_synthesis / totalStrategies) * 100
|
|
37
|
+
: 0;
|
|
38
|
+
|
|
39
|
+
return (
|
|
40
|
+
<div className="bg-gray-900/60 backdrop-blur-sm border border-yellow-500/20 rounded-xl p-4">
|
|
41
|
+
<div className="flex items-center gap-2 mb-4">
|
|
42
|
+
<span className="text-yellow-400">⚡</span>
|
|
43
|
+
<span className="text-xs text-yellow-400 font-medium">Recognition Metrics</span>
|
|
44
|
+
<span className="text-[10px] text-gray-500">(recognition profile only)</span>
|
|
45
|
+
</div>
|
|
46
|
+
|
|
47
|
+
{/* Key metrics row */}
|
|
48
|
+
<div className="grid grid-cols-3 gap-3 mb-4">
|
|
49
|
+
{/* Moments */}
|
|
50
|
+
<div className="text-center">
|
|
51
|
+
<div className="text-2xl font-bold text-yellow-400">{momentsGenerated}</div>
|
|
52
|
+
<div className="text-[10px] text-gray-400">Moments</div>
|
|
53
|
+
</div>
|
|
54
|
+
|
|
55
|
+
{/* Dialectical Depth */}
|
|
56
|
+
<div className="text-center">
|
|
57
|
+
<div className="text-2xl font-bold text-green-400">
|
|
58
|
+
{(avgDialecticalDepth * 100).toFixed(0)}%
|
|
59
|
+
</div>
|
|
60
|
+
<div className="text-[10px] text-gray-400">Dialectical Depth</div>
|
|
61
|
+
</div>
|
|
62
|
+
|
|
63
|
+
{/* Synthesis Rate */}
|
|
64
|
+
<div className="text-center">
|
|
65
|
+
<div className="text-2xl font-bold text-purple-400">{dialecticalPercent.toFixed(0)}%</div>
|
|
66
|
+
<div className="text-[10px] text-gray-400">Synthesis Rate</div>
|
|
67
|
+
</div>
|
|
68
|
+
</div>
|
|
69
|
+
|
|
70
|
+
{/* Synthesis strategy breakdown */}
|
|
71
|
+
{totalStrategies > 0 && (
|
|
72
|
+
<div className="space-y-2">
|
|
73
|
+
<div className="text-[10px] text-gray-500 mb-2">Synthesis Strategies</div>
|
|
74
|
+
|
|
75
|
+
{/* Dialectical Synthesis */}
|
|
76
|
+
<div className="flex items-center gap-2">
|
|
77
|
+
<div className="w-20 text-[10px] text-gray-400">Dialectical</div>
|
|
78
|
+
<div className="flex-1 h-2 bg-gray-800 rounded-full overflow-hidden">
|
|
79
|
+
<div
|
|
80
|
+
className="h-full bg-gradient-to-r from-yellow-500 to-green-500 rounded-full"
|
|
81
|
+
style={{ width: `${dialecticalPercent}%` }}
|
|
82
|
+
/>
|
|
83
|
+
</div>
|
|
84
|
+
<div className="w-8 text-[10px] text-green-400 text-right">
|
|
85
|
+
{synthesisStrategies.dialectical_synthesis}
|
|
86
|
+
</div>
|
|
87
|
+
</div>
|
|
88
|
+
|
|
89
|
+
{/* Learner Dominates */}
|
|
90
|
+
<div className="flex items-center gap-2">
|
|
91
|
+
<div className="w-20 text-[10px] text-gray-400">Learner</div>
|
|
92
|
+
<div className="flex-1 h-2 bg-gray-800 rounded-full overflow-hidden">
|
|
93
|
+
<div
|
|
94
|
+
className="h-full bg-blue-500 rounded-full"
|
|
95
|
+
style={{
|
|
96
|
+
width: `${totalStrategies > 0 ? (synthesisStrategies.learner_dominates / totalStrategies) * 100 : 0}%`,
|
|
97
|
+
}}
|
|
98
|
+
/>
|
|
99
|
+
</div>
|
|
100
|
+
<div className="w-8 text-[10px] text-blue-400 text-right">
|
|
101
|
+
{synthesisStrategies.learner_dominates}
|
|
102
|
+
</div>
|
|
103
|
+
</div>
|
|
104
|
+
|
|
105
|
+
{/* Ghost Dominates */}
|
|
106
|
+
<div className="flex items-center gap-2">
|
|
107
|
+
<div className="w-20 text-[10px] text-gray-400">Ghost</div>
|
|
108
|
+
<div className="flex-1 h-2 bg-gray-800 rounded-full overflow-hidden">
|
|
109
|
+
<div
|
|
110
|
+
className="h-full bg-red-500 rounded-full"
|
|
111
|
+
style={{
|
|
112
|
+
width: `${totalStrategies > 0 ? (synthesisStrategies.ghost_dominates / totalStrategies) * 100 : 0}%`,
|
|
113
|
+
}}
|
|
114
|
+
/>
|
|
115
|
+
</div>
|
|
116
|
+
<div className="w-8 text-[10px] text-red-400 text-right">
|
|
117
|
+
{synthesisStrategies.ghost_dominates}
|
|
118
|
+
</div>
|
|
119
|
+
</div>
|
|
120
|
+
</div>
|
|
121
|
+
)}
|
|
122
|
+
|
|
123
|
+
{totalStrategies === 0 && momentsGenerated === 0 && (
|
|
124
|
+
<div className="text-center py-2">
|
|
125
|
+
<div className="text-sm text-gray-500">No recognition events recorded</div>
|
|
126
|
+
<div className="text-[10px] text-gray-600 mt-1">
|
|
127
|
+
Run test to generate recognition metrics
|
|
128
|
+
</div>
|
|
129
|
+
</div>
|
|
130
|
+
)}
|
|
131
|
+
</div>
|
|
132
|
+
);
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
export default RecognitionMetricsPanel;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WinnerIndicator Component
|
|
3
|
+
*
|
|
4
|
+
* Visual badge showing which profile won a comparison.
|
|
5
|
+
* Uses color coding:
|
|
6
|
+
* - Gold/yellow gradient for winner
|
|
7
|
+
* - Gray for tie/no winner
|
|
8
|
+
* - Significance stars (* = >5%, ** = >10% improvement)
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import React from 'react';
|
|
12
|
+
|
|
13
|
+
interface WinnerIndicatorProps {
|
|
14
|
+
winner: 'baseline' | 'recognition' | null;
|
|
15
|
+
significance?: '' | '*' | '**';
|
|
16
|
+
size?: 'sm' | 'md' | 'lg';
|
|
17
|
+
showLabel?: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export const WinnerIndicator: React.FC<WinnerIndicatorProps> = ({
|
|
21
|
+
winner,
|
|
22
|
+
significance = '',
|
|
23
|
+
size = 'md',
|
|
24
|
+
showLabel = true,
|
|
25
|
+
}) => {
|
|
26
|
+
if (!winner) {
|
|
27
|
+
return (
|
|
28
|
+
<span
|
|
29
|
+
className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full
|
|
30
|
+
bg-gray-700/50 text-gray-400 border border-gray-600/30
|
|
31
|
+
${size === 'sm' ? 'text-[10px]' : size === 'lg' ? 'text-sm' : 'text-xs'}`}
|
|
32
|
+
>
|
|
33
|
+
<span className="opacity-50">~</span>
|
|
34
|
+
{showLabel && <span>Tie</span>}
|
|
35
|
+
</span>
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const isRecognition = winner === 'recognition';
|
|
40
|
+
|
|
41
|
+
// Winner styling
|
|
42
|
+
const winnerStyles = isRecognition
|
|
43
|
+
? 'bg-gradient-to-r from-yellow-500/20 to-green-500/20 text-yellow-400 border-yellow-500/30'
|
|
44
|
+
: 'bg-gradient-to-r from-blue-500/20 to-blue-400/20 text-blue-400 border-blue-500/30';
|
|
45
|
+
|
|
46
|
+
const icon = isRecognition ? '🏆' : '🎯';
|
|
47
|
+
const label = isRecognition ? 'Recognition' : 'Baseline';
|
|
48
|
+
|
|
49
|
+
return (
|
|
50
|
+
<span
|
|
51
|
+
className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full border
|
|
52
|
+
${winnerStyles}
|
|
53
|
+
${size === 'sm' ? 'text-[10px]' : size === 'lg' ? 'text-sm' : 'text-xs'}`}
|
|
54
|
+
>
|
|
55
|
+
<span>{icon}</span>
|
|
56
|
+
{showLabel && <span className="font-medium">{label}</span>}
|
|
57
|
+
{significance && (
|
|
58
|
+
<span className="text-yellow-300 font-bold">{significance}</span>
|
|
59
|
+
)}
|
|
60
|
+
</span>
|
|
61
|
+
);
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
export default WinnerIndicator;
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { RecognitionABMode } from './RecognitionABMode';
|
|
2
|
+
export { ProfileComparisonCard } from './ProfileComparisonCard';
|
|
3
|
+
export { DeltaAnalysisTable } from './DeltaAnalysisTable';
|
|
4
|
+
export { RecognitionMetricsPanel } from './RecognitionMetricsPanel';
|
|
5
|
+
export { WinnerIndicator } from './WinnerIndicator';
|