@machinespirits/eval 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -9
- package/config/eval-settings.yaml +3 -3
- package/config/paper-manifest.json +486 -0
- package/config/providers.yaml +9 -6
- package/config/tutor-agents.yaml +2261 -0
- package/content/README.md +23 -0
- package/content/courses/479/course.md +53 -0
- package/content/courses/479/lecture-1.md +361 -0
- package/content/courses/479/lecture-2.md +360 -0
- package/content/courses/479/lecture-3.md +655 -0
- package/content/courses/479/lecture-4.md +530 -0
- package/content/courses/479/lecture-5.md +326 -0
- package/content/courses/479/lecture-6.md +346 -0
- package/content/courses/479/lecture-7.md +326 -0
- package/content/courses/479/lecture-8.md +273 -0
- package/content/courses/479/roadmap-slides.md +656 -0
- package/content/manifest.yaml +8 -0
- package/docs/research/build.sh +44 -20
- package/docs/research/figures/figure10.png +0 -0
- package/docs/research/figures/figure11.png +0 -0
- package/docs/research/figures/figure3.png +0 -0
- package/docs/research/figures/figure4.png +0 -0
- package/docs/research/figures/figure5.png +0 -0
- package/docs/research/figures/figure6.png +0 -0
- package/docs/research/figures/figure7.png +0 -0
- package/docs/research/figures/figure8.png +0 -0
- package/docs/research/figures/figure9.png +0 -0
- package/docs/research/header.tex +23 -2
- package/docs/research/paper-full.md +941 -285
- package/docs/research/paper-short.md +216 -585
- package/docs/research/references.bib +132 -0
- package/docs/research/slides-header.tex +188 -0
- package/docs/research/slides-pptx.md +363 -0
- package/docs/research/slides.md +531 -0
- package/docs/research/style-reference-pptx.py +199 -0
- package/package.json +6 -5
- package/scripts/analyze-eval-results.js +69 -17
- package/scripts/analyze-mechanism-traces.js +763 -0
- package/scripts/analyze-modulation-learning.js +498 -0
- package/scripts/analyze-prosthesis.js +144 -0
- package/scripts/analyze-run.js +264 -79
- package/scripts/assess-transcripts.js +853 -0
- package/scripts/browse-transcripts.js +854 -0
- package/scripts/check-parse-failures.js +73 -0
- package/scripts/code-dialectical-modulation.js +1320 -0
- package/scripts/download-data.sh +55 -0
- package/scripts/eval-cli.js +106 -18
- package/scripts/generate-paper-figures.js +663 -0
- package/scripts/generate-paper-figures.py +577 -76
- package/scripts/generate-paper-tables.js +299 -0
- package/scripts/qualitative-analysis-ai.js +3 -3
- package/scripts/render-sequence-diagram.js +694 -0
- package/scripts/test-latency.js +210 -0
- package/scripts/test-rate-limit.js +95 -0
- package/scripts/test-token-budget.js +332 -0
- package/scripts/validate-paper-manifest.js +670 -0
- package/services/__tests__/evalConfigLoader.test.js +2 -2
- package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
- package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
- package/services/evaluationRunner.js +975 -98
- package/services/evaluationStore.js +12 -4
- package/services/learnerTutorInteractionEngine.js +27 -2
- package/services/mockProvider.js +133 -0
- package/services/promptRewriter.js +1471 -5
- package/services/rubricEvaluator.js +55 -2
- package/services/transcriptFormatter.js +675 -0
- package/docs/EVALUATION-VARIABLES.md +0 -589
- package/docs/REPLICATION-PLAN.md +0 -577
- package/scripts/analyze-run.mjs +0 -282
- package/scripts/compare-runs.js +0 -44
- package/scripts/compare-suggestions.js +0 -80
- package/scripts/dig-into-run.js +0 -158
- package/scripts/show-failed-suggestions.js +0 -64
- /package/scripts/{check-run.mjs → check-run.js} +0 -0
|
@@ -0,0 +1,694 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Render Sequence Diagram
|
|
5
|
+
*
|
|
6
|
+
* Generates standalone HTML files with SVG sequence diagrams showing
|
|
7
|
+
* the message flow between tutor ego, tutor superego, learner ego,
|
|
8
|
+
* and learner superego. Includes judge adjudication panel.
|
|
9
|
+
*
|
|
10
|
+
* Usage:
|
|
11
|
+
* node scripts/render-sequence-diagram.js <runId> [options]
|
|
12
|
+
*
|
|
13
|
+
* Options:
|
|
14
|
+
* --scenario <id> Filter by scenario
|
|
15
|
+
* --profile <name> Filter by profile name
|
|
16
|
+
* --dialogue <id> Render a specific dialogue by ID
|
|
17
|
+
* --limit <N> Max number of diagrams to render
|
|
18
|
+
* --output <dir> Output directory (default: exports/)
|
|
19
|
+
* --open Open first diagram in browser after rendering
|
|
20
|
+
*
|
|
21
|
+
* Examples:
|
|
22
|
+
* node scripts/render-sequence-diagram.js eval-2026-02-07-b6d75e87 --dialogue dialogue-1770448315802-zmvmm0
|
|
23
|
+
* node scripts/render-sequence-diagram.js eval-2026-02-07-b6d75e87 --profile cell_8_recog_multi_psycho --open
|
|
24
|
+
* node scripts/render-sequence-diagram.js eval-2026-02-07-b6d75e87 --scenario mutual_transformation_journey --limit 4
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import fs from 'fs';
|
|
28
|
+
import path from 'path';
|
|
29
|
+
import { execSync } from 'child_process';
|
|
30
|
+
import Database from 'better-sqlite3';
|
|
31
|
+
import YAML from 'yaml';
|
|
32
|
+
|
|
33
|
+
const DB_PATH = path.join(import.meta.dirname, '..', 'data', 'evaluations.db');
|
|
34
|
+
const LOGS_DIR = path.join(import.meta.dirname, '..', 'logs', 'tutor-dialogues');
|
|
35
|
+
const DEFAULT_OUTPUT = path.join(import.meta.dirname, '..', 'exports');
|
|
36
|
+
|
|
37
|
+
// ── CLI parsing ──────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
const args = process.argv.slice(2);
|
|
40
|
+
if (args.length === 0 || args.includes('--help') || args.includes('-h')) {
|
|
41
|
+
console.log(`
|
|
42
|
+
Usage: render-sequence-diagram.js <runId> [options]
|
|
43
|
+
|
|
44
|
+
Options:
|
|
45
|
+
--scenario <id> Filter by scenario
|
|
46
|
+
--profile <name> Filter by profile name
|
|
47
|
+
--dialogue <id> Render a specific dialogue by ID
|
|
48
|
+
--limit <N> Max number of diagrams (default: all)
|
|
49
|
+
--output <dir> Output directory (default: exports/)
|
|
50
|
+
--open Open first diagram in browser
|
|
51
|
+
`);
|
|
52
|
+
process.exit(0);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function getOption(name) {
|
|
56
|
+
const idx = args.indexOf('--' + name);
|
|
57
|
+
return idx >= 0 && idx + 1 < args.length ? args[idx + 1] : null;
|
|
58
|
+
}
|
|
59
|
+
function getFlag(name) { return args.includes('--' + name); }
|
|
60
|
+
|
|
61
|
+
const runId = args.find(a => !a.startsWith('--') && a !== getOption('scenario') && a !== getOption('profile') && a !== getOption('dialogue') && a !== getOption('limit') && a !== getOption('output'));
|
|
62
|
+
const scenarioFilter = getOption('scenario');
|
|
63
|
+
const profileFilter = getOption('profile');
|
|
64
|
+
const dialogueFilter = getOption('dialogue');
|
|
65
|
+
const limit = getOption('limit') ? parseInt(getOption('limit')) : null;
|
|
66
|
+
const outputDir = getOption('output') || DEFAULT_OUTPUT;
|
|
67
|
+
const shouldOpen = getFlag('open');
|
|
68
|
+
|
|
69
|
+
if (!runId) {
|
|
70
|
+
console.error('Error: run ID required');
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── DB queries ───────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
const db = new Database(DB_PATH, { readonly: true });
|
|
77
|
+
|
|
78
|
+
let query = `SELECT id, profile_name, scenario_id, dialogue_id, overall_score, judge_model,
|
|
79
|
+
ego_model, superego_model,
|
|
80
|
+
score_relevance, score_specificity, score_pedagogical, score_personalization,
|
|
81
|
+
score_actionability, score_tone, scores_with_reasoning, qualitative_assessment, qualitative_model
|
|
82
|
+
FROM evaluation_results
|
|
83
|
+
WHERE run_id = ? AND dialogue_id IS NOT NULL`;
|
|
84
|
+
const params = [runId];
|
|
85
|
+
|
|
86
|
+
if (dialogueFilter) { query += ' AND dialogue_id = ?'; params.push(dialogueFilter); }
|
|
87
|
+
if (scenarioFilter) { query += ' AND scenario_id LIKE ?'; params.push('%' + scenarioFilter + '%'); }
|
|
88
|
+
if (profileFilter) { query += ' AND profile_name LIKE ?'; params.push('%' + profileFilter + '%'); }
|
|
89
|
+
|
|
90
|
+
query += ' ORDER BY overall_score DESC';
|
|
91
|
+
if (limit) { query += ' LIMIT ?'; params.push(limit); }
|
|
92
|
+
|
|
93
|
+
const results = db.prepare(query).all(...params);
|
|
94
|
+
|
|
95
|
+
if (results.length === 0) {
|
|
96
|
+
console.log('No multi-turn dialogues found matching filters.');
|
|
97
|
+
process.exit(0);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
console.log(`Found ${results.length} dialogue(s) to render.`);
|
|
101
|
+
|
|
102
|
+
// ── Trace → sequence steps ───────────────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
function shortModel(m) {
|
|
105
|
+
if (!m) return '?';
|
|
106
|
+
// Strip provider prefix and version suffixes: "openrouter.kimi-k2.5" → "kimi-k2.5", "moonshotai/kimi-k2.5" → "kimi-k2.5"
|
|
107
|
+
return String(m).replace(/^openrouter\./, '').split('/').pop().split(':')[0];
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function extractLearnerQuery(entry) {
|
|
111
|
+
const raw = entry.rawContext || '';
|
|
112
|
+
const match = raw.match(/Learner Messages?:\s*(.+?)(?:\n<\/|$)/s)
|
|
113
|
+
|| raw.match(/Recent Chat History\n-\s*User:\s*"(.+?)"/s);
|
|
114
|
+
return match ? match[1].trim() : null;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function snippet(entry, maxLen = 90) {
|
|
118
|
+
return fullContent(entry).substring(0, maxLen);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function fullContent(entry) {
|
|
122
|
+
if (entry.agent === 'superego' && entry.action === 'review') {
|
|
123
|
+
return entry.feedback || entry.verdict?.feedback || '';
|
|
124
|
+
}
|
|
125
|
+
if (entry.suggestions?.length > 0) {
|
|
126
|
+
return entry.suggestions.map(s => s.message || s.text || s.title || '').join('\n\n');
|
|
127
|
+
}
|
|
128
|
+
if (entry.agent === 'user' && entry.action === 'context_input') {
|
|
129
|
+
return extractLearnerQuery(entry) || '(scenario context)';
|
|
130
|
+
}
|
|
131
|
+
if (entry.agent === 'user' && entry.action === 'turn_action') {
|
|
132
|
+
return entry.contextSummary || entry.detail || '';
|
|
133
|
+
}
|
|
134
|
+
return entry.detail || entry.contextSummary || '';
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function traceToSteps(trace) {
|
|
138
|
+
const steps = [];
|
|
139
|
+
let dialogueTurn = 0;
|
|
140
|
+
|
|
141
|
+
// Identify indices where learner blocks start, so we can insert "Response" arrows
|
|
142
|
+
const learnerBlockStarts = new Set();
|
|
143
|
+
trace.forEach((e, i) => {
|
|
144
|
+
if (e.agent === 'learner_ego_initial') learnerBlockStarts.add(i);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Track whether we've emitted a Response arrow for the current tutor block
|
|
148
|
+
let needsResponseArrow = false;
|
|
149
|
+
|
|
150
|
+
for (let i = 0; i < trace.length; i++) {
|
|
151
|
+
const e = trace[i];
|
|
152
|
+
const { agent, action } = e;
|
|
153
|
+
|
|
154
|
+
// If we're entering a learner block and haven't sent a Response arrow yet
|
|
155
|
+
if (learnerBlockStarts.has(i) && needsResponseArrow) {
|
|
156
|
+
// Find the last tutor ego output to use as the response content
|
|
157
|
+
let responseContent = '';
|
|
158
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
159
|
+
const prev = trace[j];
|
|
160
|
+
if (prev.agent === 'ego' && (prev.action === 'generate' || prev.action === 'revise' || prev.action === 'incorporate-feedback')) {
|
|
161
|
+
responseContent = fullContent(prev);
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
steps.push({ from: 'tutor_ego', to: 'learner_ego', label: 'Response', detail: '', fullDetail: responseContent, type: 'response', speaker: 'TUTOR EGO' });
|
|
166
|
+
needsResponseArrow = false;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (agent === 'system') continue;
|
|
170
|
+
if (agent === 'user' && action === 'final_output') continue;
|
|
171
|
+
if (agent === 'learner_synthesis') continue;
|
|
172
|
+
|
|
173
|
+
// Context input
|
|
174
|
+
if (agent === 'user' && action === 'context_input') {
|
|
175
|
+
dialogueTurn++;
|
|
176
|
+
if (dialogueTurn === 1) {
|
|
177
|
+
const query = extractLearnerQuery(e);
|
|
178
|
+
const full = query || '(scenario prompt)';
|
|
179
|
+
steps.push({
|
|
180
|
+
from: 'learner_ego', to: 'tutor_ego',
|
|
181
|
+
label: 'Initial query',
|
|
182
|
+
detail: full.substring(0, 120),
|
|
183
|
+
fullDetail: full,
|
|
184
|
+
type: 'front', speaker: 'LEARNER',
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
needsResponseArrow = true;
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Tutor ego generate/revise
|
|
192
|
+
if (agent === 'ego' && (action === 'generate' || action === 'revise' || action === 'incorporate-feedback')) {
|
|
193
|
+
const full = fullContent(e);
|
|
194
|
+
|
|
195
|
+
// Look ahead: does a superego review follow before the next learner block?
|
|
196
|
+
let superegoFollows = false;
|
|
197
|
+
for (let j = i + 1; j < trace.length; j++) {
|
|
198
|
+
if (trace[j].agent === 'superego' && trace[j].action === 'review') { superegoFollows = true; break; }
|
|
199
|
+
if (learnerBlockStarts.has(j)) break; // hit learner block first — no review coming
|
|
200
|
+
if (trace[j].agent === 'user' && trace[j].action === 'context_input') break;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (action !== 'generate' && !superegoFollows) {
|
|
204
|
+
// Final revision with no superego review — render as direct Response to learner
|
|
205
|
+
steps.push({
|
|
206
|
+
from: 'tutor_ego', to: 'learner_ego', label: 'Response',
|
|
207
|
+
detail: '', fullDetail: full, type: 'response',
|
|
208
|
+
latency: e.metrics?.latencyMs || null,
|
|
209
|
+
speaker: 'TUTOR EGO', model: e.metrics?.model || null,
|
|
210
|
+
});
|
|
211
|
+
needsResponseArrow = false;
|
|
212
|
+
} else {
|
|
213
|
+
const label = action === 'generate' ? 'Draft' : 'Revised';
|
|
214
|
+
steps.push({
|
|
215
|
+
from: 'tutor_ego', to: 'tutor_superego', label,
|
|
216
|
+
detail: snippet(e, 120), fullDetail: full, type: 'back',
|
|
217
|
+
latency: e.metrics?.latencyMs || null,
|
|
218
|
+
speaker: action === 'generate' ? 'TUTOR EGO (draft)' : 'TUTOR EGO (revised)',
|
|
219
|
+
model: e.metrics?.model || null,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Tutor superego review
|
|
226
|
+
if (agent === 'superego' && action === 'review') {
|
|
227
|
+
const approved = e.approved;
|
|
228
|
+
const full = fullContent(e);
|
|
229
|
+
if (approved) {
|
|
230
|
+
steps.push({
|
|
231
|
+
from: 'tutor_superego', to: 'tutor_ego', label: 'Approved \u2713',
|
|
232
|
+
detail: snippet(e, 120), fullDetail: full, type: 'back', approved: true,
|
|
233
|
+
latency: e.metrics?.latencyMs || null,
|
|
234
|
+
speaker: 'SUPEREGO', model: e.metrics?.model || null,
|
|
235
|
+
});
|
|
236
|
+
// Find the approved ego output for the response
|
|
237
|
+
let responseContent = '';
|
|
238
|
+
for (let j = i - 1; j >= 0; j--) {
|
|
239
|
+
const prev = trace[j];
|
|
240
|
+
if (prev.agent === 'ego' && (prev.action === 'generate' || prev.action === 'revise' || prev.action === 'incorporate-feedback')) {
|
|
241
|
+
responseContent = fullContent(prev);
|
|
242
|
+
break;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
steps.push({ from: 'tutor_ego', to: 'learner_ego', label: 'Response', detail: '', fullDetail: responseContent, type: 'response', speaker: 'TUTOR EGO' });
|
|
246
|
+
needsResponseArrow = false;
|
|
247
|
+
} else {
|
|
248
|
+
steps.push({
|
|
249
|
+
from: 'tutor_superego', to: 'tutor_ego', label: 'Revise \u21BB',
|
|
250
|
+
detail: snippet(e, 120), fullDetail: full, type: 'back', approved: false,
|
|
251
|
+
latency: e.metrics?.latencyMs || null,
|
|
252
|
+
speaker: 'SUPEREGO', model: e.metrics?.model || null,
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Learner ego initial
|
|
259
|
+
if (agent === 'learner_ego_initial' && action === 'deliberation') {
|
|
260
|
+
const full = fullContent(e);
|
|
261
|
+
steps.push({
|
|
262
|
+
from: 'learner_ego', to: 'learner_superego', label: 'Reaction',
|
|
263
|
+
detail: snippet(e, 120), fullDetail: full, type: 'back',
|
|
264
|
+
speaker: 'LEARNER EGO',
|
|
265
|
+
});
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Learner superego
|
|
270
|
+
if (agent === 'learner_superego' && action === 'deliberation') {
|
|
271
|
+
const full = fullContent(e);
|
|
272
|
+
steps.push({
|
|
273
|
+
from: 'learner_superego', to: 'learner_ego', label: 'Critique',
|
|
274
|
+
detail: snippet(e, 120), fullDetail: full, type: 'back',
|
|
275
|
+
speaker: 'LEARNER SUPEREGO',
|
|
276
|
+
});
|
|
277
|
+
continue;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Learner ego revision — skip, turn_action carries the final
|
|
281
|
+
if (agent === 'learner_ego_revision') continue;
|
|
282
|
+
|
|
283
|
+
// Turn action = learner's external message
|
|
284
|
+
if (agent === 'user' && action === 'turn_action') {
|
|
285
|
+
const full = fullContent(e);
|
|
286
|
+
steps.push({
|
|
287
|
+
from: 'learner_ego', to: 'tutor_ego',
|
|
288
|
+
label: 'Turn ' + (dialogueTurn + 1),
|
|
289
|
+
detail: snippet(e, 120), fullDetail: full, type: 'front',
|
|
290
|
+
speaker: 'LEARNER',
|
|
291
|
+
});
|
|
292
|
+
needsResponseArrow = true;
|
|
293
|
+
continue;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return steps;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ── HTML template ────────────────────────────────────────────────────────────
|
|
301
|
+
|
|
302
|
+
function escapeHtml(text) {
|
|
303
|
+
if (!text) return '';
|
|
304
|
+
return text.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function generateHtml(result, steps, trace, meta = {}) {
|
|
308
|
+
const profile = result.profile_name;
|
|
309
|
+
const scenario = result.scenario_id;
|
|
310
|
+
const score = result.overall_score?.toFixed(1) || '--';
|
|
311
|
+
|
|
312
|
+
let judgeScores = {};
|
|
313
|
+
try { judgeScores = JSON.parse(result.scores_with_reasoning || '{}'); } catch {}
|
|
314
|
+
let qualitative = {};
|
|
315
|
+
try { qualitative = JSON.parse(result.qualitative_assessment || '{}'); } catch {}
|
|
316
|
+
|
|
317
|
+
const isRecog = /recog/i.test(profile);
|
|
318
|
+
const condLabel = isRecog ? 'Recognition' : 'Base';
|
|
319
|
+
|
|
320
|
+
// ── Actors & SVG dimensions ──
|
|
321
|
+
const actors = [
|
|
322
|
+
{ id: 'learner_superego', label: 'L.Superego', model: shortModel(meta.learnerSuperegoModel), color: '#fce4ec', textColor: '#c62828', stroke: '#ef5350' },
|
|
323
|
+
{ id: 'learner_ego', label: 'L.Ego', model: shortModel(meta.learnerEgoModel), color: '#f3e5f5', textColor: '#6a1b9a', stroke: '#ab47bc' },
|
|
324
|
+
{ id: 'tutor_ego', label: 'T.Ego', model: shortModel(meta.egoModel), color: '#e3f2fd', textColor: '#1565c0', stroke: '#42a5f5' },
|
|
325
|
+
{ id: 'tutor_superego', label: 'T.Superego', model: shortModel(meta.superegoModel), color: '#e8f5e9', textColor: '#2e7d32', stroke: '#66bb6a' },
|
|
326
|
+
];
|
|
327
|
+
const colMap = {};
|
|
328
|
+
actors.forEach((a, i) => { colMap[a.id] = i; });
|
|
329
|
+
|
|
330
|
+
const colWidth = 140;
|
|
331
|
+
const rowHeight = 38;
|
|
332
|
+
const headerHeight = 56;
|
|
333
|
+
const padding = 20;
|
|
334
|
+
const svgWidth = colWidth * actors.length + padding * 2;
|
|
335
|
+
const svgHeight = headerHeight + steps.length * rowHeight + 30;
|
|
336
|
+
|
|
337
|
+
// ── Build SVG ──
|
|
338
|
+
let svg = '';
|
|
339
|
+
|
|
340
|
+
// Actor column headers with model subtitle
|
|
341
|
+
actors.forEach((a, i) => {
|
|
342
|
+
const x = padding + i * colWidth;
|
|
343
|
+
const cx = x + colWidth / 2;
|
|
344
|
+
svg += `<rect x="${x + 8}" y="4" width="${colWidth - 16}" height="40" rx="5" fill="${a.color}" stroke="${a.stroke}" stroke-width="1"/>`;
|
|
345
|
+
svg += `<text x="${cx}" y="21" text-anchor="middle" font-size="11" font-weight="600" fill="${a.textColor}">${a.label}</text>`;
|
|
346
|
+
svg += `<text x="${cx}" y="36" text-anchor="middle" font-size="8.5" fill="${a.textColor}" opacity="0.65">${a.model}</text>`;
|
|
347
|
+
svg += `<line x1="${cx}" y1="${headerHeight}" x2="${cx}" y2="${svgHeight - 10}" stroke="#333" stroke-width="1" stroke-dasharray="3,3"/>`;
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
// Turn separator lines
|
|
351
|
+
const turnBoundaries = [];
|
|
352
|
+
let prevTurn = '';
|
|
353
|
+
steps.forEach((s, i) => {
|
|
354
|
+
if (s.label.startsWith('Turn ') || s.label === 'Initial query') {
|
|
355
|
+
const num = s.label === 'Initial query' ? 1 : parseInt(s.label.replace('Turn ', ''));
|
|
356
|
+
if (num !== prevTurn) { turnBoundaries.push({ index: i, turn: num }); prevTurn = num; }
|
|
357
|
+
}
|
|
358
|
+
});
|
|
359
|
+
turnBoundaries.forEach(tb => {
|
|
360
|
+
const y = headerHeight + tb.index * rowHeight;
|
|
361
|
+
svg += `<line x1="${padding}" y1="${y}" x2="${svgWidth - padding}" y2="${y}" stroke="#444" stroke-width="0.5"/>`;
|
|
362
|
+
svg += `<text x="${svgWidth - padding + 3}" y="${y + 12}" font-size="9" fill="#666" font-weight="600">T${tb.turn}</text>`;
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// Arrow groups — each clickable
|
|
366
|
+
steps.forEach((step, i) => {
|
|
367
|
+
const fromCol = colMap[step.from];
|
|
368
|
+
const toCol = colMap[step.to];
|
|
369
|
+
if (fromCol === undefined || toCol === undefined) return;
|
|
370
|
+
|
|
371
|
+
const fromX = padding + fromCol * colWidth + colWidth / 2;
|
|
372
|
+
const toX = padding + toCol * colWidth + colWidth / 2;
|
|
373
|
+
const y = headerHeight + i * rowHeight + rowHeight / 2;
|
|
374
|
+
const isLR = fromX < toX;
|
|
375
|
+
|
|
376
|
+
let color;
|
|
377
|
+
const fromActor = actors[fromCol];
|
|
378
|
+
if (step.type === 'front' || step.type === 'response') { color = '#78909c'; }
|
|
379
|
+
else { color = fromActor.stroke; }
|
|
380
|
+
|
|
381
|
+
const sw = (step.type === 'front' || step.type === 'response') ? 2.2 : 1.2;
|
|
382
|
+
const tipOff = isLR ? -6 : 6;
|
|
383
|
+
|
|
384
|
+
// Invisible wider hit area for clicking
|
|
385
|
+
svg += `<g data-step="${i}" class="arrow-group" style="cursor:pointer" onclick="highlight(${i})">`;
|
|
386
|
+
svg += `<line x1="${fromX}" y1="${y}" x2="${toX}" y2="${y}" stroke="transparent" stroke-width="20"/>`;
|
|
387
|
+
svg += `<line x1="${fromX}" y1="${y}" x2="${toX + tipOff}" y2="${y}" stroke="${color}" stroke-width="${sw}" class="arrow-line"/>`;
|
|
388
|
+
if (isLR) {
|
|
389
|
+
svg += `<polygon points="${toX - 6},${y - 3.5} ${toX},${y} ${toX - 6},${y + 3.5}" fill="${color}"/>`;
|
|
390
|
+
} else {
|
|
391
|
+
svg += `<polygon points="${toX + 6},${y - 3.5} ${toX},${y} ${toX + 6},${y + 3.5}" fill="${color}"/>`;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const labelX = (fromX + toX) / 2;
|
|
395
|
+
let labelColor = '#bbb';
|
|
396
|
+
if (step.approved === true) labelColor = '#66bb6a';
|
|
397
|
+
if (step.approved === false) labelColor = '#ff7043';
|
|
398
|
+
|
|
399
|
+
svg += `<text x="${labelX}" y="${y - 6}" text-anchor="middle" font-size="9.5" font-weight="500" fill="${labelColor}">${escapeHtml(step.label)}</text>`;
|
|
400
|
+
if (step.latency) {
|
|
401
|
+
const lat = step.latency < 1000 ? step.latency + 'ms' : (step.latency / 1000).toFixed(1) + 's';
|
|
402
|
+
svg += `<text x="${labelX}" y="${y + 13}" text-anchor="middle" font-size="8" fill="#555">${lat}</text>`;
|
|
403
|
+
}
|
|
404
|
+
svg += `</g>`;
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// ── Build transcript entries ──
|
|
408
|
+
const speakerColors = {
|
|
409
|
+
'TUTOR EGO': '#42a5f5', 'TUTOR EGO (draft)': '#42a5f5', 'TUTOR EGO (revised)': '#42a5f5',
|
|
410
|
+
'SUPEREGO': '#66bb6a', 'LEARNER EGO': '#ab47bc', 'LEARNER SUPEREGO': '#ef5350',
|
|
411
|
+
'LEARNER': '#78909c',
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
let transcriptHtml = '';
|
|
415
|
+
steps.forEach((step, i) => {
|
|
416
|
+
const speaker = step.speaker || step.label;
|
|
417
|
+
const color = speakerColors[speaker] || '#999';
|
|
418
|
+
const content = step.fullDetail || step.detail || '';
|
|
419
|
+
if (!content && step.type === 'response') return; // skip empty response arrows without content
|
|
420
|
+
|
|
421
|
+
let badge = '';
|
|
422
|
+
if (step.approved === true) badge = '<span class="badge approved">APPROVED</span>';
|
|
423
|
+
else if (step.approved === false) badge = '<span class="badge revise">REVISE</span>';
|
|
424
|
+
|
|
425
|
+
const modelStr = step.model ? `<span class="entry-model">${escapeHtml(String(step.model).split('/').pop().split(':')[0])}</span>` : '';
|
|
426
|
+
|
|
427
|
+
transcriptHtml += `<div class="entry" id="entry-${i}" data-step="${i}" onclick="highlight(${i})">
|
|
428
|
+
<div class="entry-speaker" style="color:${color}">${escapeHtml(speaker)} ${badge} ${modelStr}</div>
|
|
429
|
+
<div class="entry-content">${escapeHtml(content)}</div>
|
|
430
|
+
</div>\n`;
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
// ── Judge table ──
|
|
434
|
+
let judgeRows = '';
|
|
435
|
+
for (const [dim, data] of Object.entries(judgeScores)) {
|
|
436
|
+
const label = dim.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
|
|
437
|
+
const sv = data.score || 0;
|
|
438
|
+
const reasoning = escapeHtml(data.reasoning || '');
|
|
439
|
+
const barW = (sv / 5) * 100;
|
|
440
|
+
const barC = sv >= 4 ? '#4caf50' : sv >= 3 ? '#ff9800' : '#f44336';
|
|
441
|
+
judgeRows += `<tr>
|
|
442
|
+
<td class="jd">${label}</td><td class="js" style="color:${barC}">${sv}</td>
|
|
443
|
+
<td class="jb"><div class="bar-bg"><div class="bar-fg" style="width:${barW}%;background:${barC}"></div></div></td>
|
|
444
|
+
<td class="jr">${reasoning}</td></tr>`;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// ── Qualitative ──
|
|
448
|
+
let qualHtml = '';
|
|
449
|
+
const axes = [
|
|
450
|
+
['pedagogical_arc', 'Pedagogical Arc'], ['recognition_dynamics', 'Recognition Dynamics'],
|
|
451
|
+
['superego_effectiveness', 'Superego Effectiveness'], ['learner_trajectory', 'Learner Trajectory'],
|
|
452
|
+
['missed_opportunities', 'Missed Opportunities'], ['overall_narrative', 'Overall Narrative'],
|
|
453
|
+
];
|
|
454
|
+
for (const [k, lab] of axes) {
|
|
455
|
+
if (qualitative[k]) {
|
|
456
|
+
qualHtml += `<div class="qual-item"><div class="qual-label">${lab}</div><div class="qual-text">${escapeHtml(qualitative[k])}</div></div>`;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
if (qualitative.key_turning_point) {
|
|
460
|
+
const ktp = qualitative.key_turning_point;
|
|
461
|
+
qualHtml += `<div class="qual-ktp"><div class="qual-label" style="color:#ffab40">Key Turning Point (Turn ${ktp.turn || '?'})</div><div class="qual-text">${escapeHtml(ktp.description || '')}</div></div>`;
|
|
462
|
+
}
|
|
463
|
+
if (qualitative.tags?.length) {
|
|
464
|
+
qualHtml += `<div class="qual-tags">${qualitative.tags.map(t => `<span class="tag">${escapeHtml(t)}</span>`).join('')}</div>`;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
return `<!DOCTYPE html>
|
|
468
|
+
<html lang="en">
|
|
469
|
+
<head>
|
|
470
|
+
<meta charset="UTF-8">
|
|
471
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
472
|
+
<title>${escapeHtml(scenario)} — ${escapeHtml(profile)}</title>
|
|
473
|
+
<style>
|
|
474
|
+
:root { --bg:#0d1117; --surface:#161b22; --border:#30363d; --text:#e6edf3; --muted:#8b949e; }
|
|
475
|
+
* { box-sizing:border-box; margin:0; padding:0; }
|
|
476
|
+
body { font-family:'SF Mono','Fira Code','JetBrains Mono',monospace; background:var(--bg); color:var(--text); height:100vh; overflow:hidden; display:flex; flex-direction:column; }
|
|
477
|
+
|
|
478
|
+
.top-bar { padding:12px 20px; border-bottom:1px solid var(--border); background:var(--surface); display:flex; align-items:center; justify-content:space-between; flex-shrink:0; gap:20px; }
|
|
479
|
+
.top-bar h1 { font-size:14px; font-weight:600; margin-bottom:4px; }
|
|
480
|
+
.top-bar .meta-grid { display:grid; grid-template-columns:auto auto auto; gap:2px 16px; font-size:11px; }
|
|
481
|
+
.top-bar .meta-label { color:var(--muted); }
|
|
482
|
+
.top-bar .meta-value { color:var(--text); font-weight:500; }
|
|
483
|
+
.top-bar .meta-id { font-size:9px; color:#555; margin-top:3px; }
|
|
484
|
+
.top-bar .score-badge { padding:3px 12px; border-radius:12px; font-weight:700; font-size:14px; color:#fff;
|
|
485
|
+
background:${parseFloat(score) >= 90 ? '#1b5e20' : parseFloat(score) >= 70 ? '#e65100' : '#b71c1c'}; }
|
|
486
|
+
|
|
487
|
+
.split { display:flex; flex:1; overflow:hidden; }
|
|
488
|
+
|
|
489
|
+
/* Left: sequence diagram */
|
|
490
|
+
.left-pane { width:50%; overflow:auto; border-right:1px solid var(--border); padding:12px; flex-shrink:0; }
|
|
491
|
+
.left-pane svg { display:block; margin:0 auto; }
|
|
492
|
+
svg text { font-family:'SF Mono','Fira Code',monospace; }
|
|
493
|
+
.arrow-group:hover .arrow-line { stroke-width:3 !important; }
|
|
494
|
+
.arrow-group.active .arrow-line { stroke-width:3.5 !important; filter:drop-shadow(0 0 4px currentColor); }
|
|
495
|
+
.arrow-group.active text { font-weight:700 !important; }
|
|
496
|
+
|
|
497
|
+
.legend { display:flex; gap:14px; justify-content:center; padding:8px; font-size:10px; color:var(--muted); flex-shrink:0; }
|
|
498
|
+
.legend span { display:flex; align-items:center; gap:3px; }
|
|
499
|
+
.legend .sw { width:12px; height:3px; border-radius:2px; }
|
|
500
|
+
|
|
501
|
+
/* Right: transcript (scrolls independently) */
|
|
502
|
+
.right-pane { width:50%; overflow-y:auto; padding:12px 16px; }
|
|
503
|
+
|
|
504
|
+
.entry { padding:10px 12px; margin:4px 0; border-radius:6px; border:1px solid transparent; cursor:pointer; transition:all 0.15s; }
|
|
505
|
+
.entry:hover { background:rgba(255,255,255,0.03); border-color:var(--border); }
|
|
506
|
+
.entry.active { background:rgba(88,166,255,0.08); border-color:#58a6ff; box-shadow:0 0 12px rgba(88,166,255,0.15); }
|
|
507
|
+
.entry-speaker { font-size:10px; font-weight:700; text-transform:uppercase; letter-spacing:0.5px; margin-bottom:4px; display:flex; align-items:center; gap:6px; }
|
|
508
|
+
.entry-model { font-weight:400; color:var(--muted); font-size:9px; }
|
|
509
|
+
.entry-content { font-size:12px; line-height:1.6; color:#ccc; white-space:pre-wrap; word-wrap:break-word; }
|
|
510
|
+
|
|
511
|
+
.badge { font-size:9px; padding:1px 6px; border-radius:8px; font-weight:600; }
|
|
512
|
+
.badge.approved { background:rgba(102,187,106,0.2); color:#66bb6a; }
|
|
513
|
+
.badge.revise { background:rgba(255,112,67,0.2); color:#ff7043; }
|
|
514
|
+
|
|
515
|
+
/* Judge panel — collapsible below split */
|
|
516
|
+
.judge-panel { flex-shrink:0; border-top:1px solid var(--border); background:var(--surface); }
|
|
517
|
+
.judge-toggle { padding:10px 20px; cursor:pointer; font-size:11px; text-transform:uppercase; letter-spacing:1.5px; color:var(--muted); font-weight:600; list-style:none; user-select:none; }
|
|
518
|
+
.judge-toggle::-webkit-details-marker { display:none; }
|
|
519
|
+
.judge-toggle::before { content:'▸ '; }
|
|
520
|
+
.judge-panel[open] .judge-toggle::before { content:'▾ '; }
|
|
521
|
+
.judge-body { padding:4px 20px 16px; max-height:50vh; overflow-y:auto; }
|
|
522
|
+
table { width:100%; border-collapse:collapse; font-size:11px; }
|
|
523
|
+
tr { border-bottom:1px solid #1e1e1e; }
|
|
524
|
+
.jd { padding:5px 8px; font-weight:500; white-space:nowrap; color:#ccc; }
|
|
525
|
+
.js { padding:5px 6px; text-align:center; font-weight:700; }
|
|
526
|
+
.jb { padding:5px 8px; }
|
|
527
|
+
.jr { padding:5px 8px; color:var(--muted); font-size:10px; }
|
|
528
|
+
.bar-bg { background:#262626; border-radius:3px; height:5px; width:80px; }
|
|
529
|
+
.bar-fg { border-radius:3px; height:5px; }
|
|
530
|
+
|
|
531
|
+
.qual-item { margin-bottom:12px; }
|
|
532
|
+
.qual-label { font-weight:600; color:#90caf9; font-size:11px; margin-bottom:3px; }
|
|
533
|
+
.qual-text { color:#aaa; font-size:11px; line-height:1.6; }
|
|
534
|
+
.qual-ktp { margin:12px 0; padding:10px; background:#1a237e; border-radius:6px; }
|
|
535
|
+
.qual-tags { margin-top:8px; }
|
|
536
|
+
.tag { display:inline-block; padding:2px 8px; margin:2px; border-radius:10px; font-size:10px; font-weight:600; background:#263238; color:#80cbc4; }
|
|
537
|
+
</style>
|
|
538
|
+
</head>
|
|
539
|
+
<body>
|
|
540
|
+
|
|
541
|
+
<div class="top-bar">
|
|
542
|
+
<div>
|
|
543
|
+
<h1>${escapeHtml(scenario?.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()) || '')}</h1>
|
|
544
|
+
<div class="meta-grid">
|
|
545
|
+
<span class="meta-label">Cell</span><span class="meta-value">${escapeHtml(profile)}</span><span class="meta-value">${condLabel}${meta.totalTurns ? ' · ' + meta.totalTurns + ' turns' : ''}</span>
|
|
546
|
+
<span class="meta-label">Tutor</span><span class="meta-value">ego ${escapeHtml(shortModel(meta.egoModel))}</span><span class="meta-value">superego ${escapeHtml(shortModel(meta.superegoModel) || shortModel(meta.egoModel))}</span>
|
|
547
|
+
<span class="meta-label">Learner</span><span class="meta-value">ego ${escapeHtml(shortModel(meta.learnerEgoModel))}</span><span class="meta-value">superego ${escapeHtml(shortModel(meta.learnerSuperegoModel))}</span>
|
|
548
|
+
<span class="meta-label">Judge</span><span class="meta-value">${escapeHtml(shortModel(meta.judgeModel))}</span><span></span>
|
|
549
|
+
</div>
|
|
550
|
+
<div class="meta-id">${escapeHtml(meta.runId)} · ${escapeHtml(meta.dialogueId)}</div>
|
|
551
|
+
</div>
|
|
552
|
+
<span class="score-badge">${score}</span>
|
|
553
|
+
</div>
|
|
554
|
+
|
|
555
|
+
<div class="legend">
|
|
556
|
+
<span><span class="sw" style="background:#78909c"></span> Front stage</span>
|
|
557
|
+
<span><span class="sw" style="background:#ef5350"></span> L.Superego</span>
|
|
558
|
+
<span><span class="sw" style="background:#ab47bc"></span> L.Ego</span>
|
|
559
|
+
<span><span class="sw" style="background:#42a5f5"></span> T.Ego</span>
|
|
560
|
+
<span><span class="sw" style="background:#66bb6a"></span> T.Superego</span>
|
|
561
|
+
</div>
|
|
562
|
+
|
|
563
|
+
<div class="split">
|
|
564
|
+
<div class="left-pane">
|
|
565
|
+
<svg width="${svgWidth + 20}" height="${svgHeight}" xmlns="http://www.w3.org/2000/svg">${svg}</svg>
|
|
566
|
+
</div>
|
|
567
|
+
<div class="right-pane" id="transcript">
|
|
568
|
+
${transcriptHtml}
|
|
569
|
+
</div>
|
|
570
|
+
</div>
|
|
571
|
+
|
|
572
|
+
${(judgeRows || qualHtml) ? `<details class="judge-panel">
|
|
573
|
+
<summary class="judge-toggle">Judge Adjudication — ${score}/100</summary>
|
|
574
|
+
<div class="judge-body">
|
|
575
|
+
${judgeRows ? `<table>${judgeRows}</table>` : ''}
|
|
576
|
+
${qualHtml ? `<div style="margin-top:16px">${qualHtml}</div>` : ''}
|
|
577
|
+
</div>
|
|
578
|
+
</details>` : ''}
|
|
579
|
+
|
|
580
|
+
<script>
|
|
581
|
+
let activeStep = -1;
|
|
582
|
+
function highlight(idx) {
|
|
583
|
+
// Clear previous
|
|
584
|
+
document.querySelectorAll('.arrow-group.active').forEach(g => g.classList.remove('active'));
|
|
585
|
+
document.querySelectorAll('.entry.active').forEach(e => e.classList.remove('active'));
|
|
586
|
+
|
|
587
|
+
// Activate
|
|
588
|
+
const arrow = document.querySelector('.arrow-group[data-step="'+idx+'"]');
|
|
589
|
+
const entry = document.getElementById('entry-' + idx);
|
|
590
|
+
if (arrow) arrow.classList.add('active');
|
|
591
|
+
if (entry) {
|
|
592
|
+
entry.classList.add('active');
|
|
593
|
+
entry.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
594
|
+
}
|
|
595
|
+
// Also scroll SVG to show the arrow
|
|
596
|
+
if (arrow) {
|
|
597
|
+
const rect = arrow.getBoundingClientRect();
|
|
598
|
+
const pane = document.querySelector('.left-pane');
|
|
599
|
+
const paneRect = pane.getBoundingClientRect();
|
|
600
|
+
if (rect.top < paneRect.top + 60 || rect.bottom > paneRect.bottom - 20) {
|
|
601
|
+
const y = pane.scrollTop + rect.top - paneRect.top - paneRect.height / 2;
|
|
602
|
+
pane.scrollTo({ top: y, behavior: 'smooth' });
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
activeStep = idx;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// Keyboard navigation
|
|
609
|
+
document.addEventListener('keydown', e => {
|
|
610
|
+
const maxStep = document.querySelectorAll('.arrow-group').length - 1;
|
|
611
|
+
if (e.key === 'ArrowDown' || e.key === 'j') { e.preventDefault(); highlight(Math.min(activeStep + 1, maxStep)); }
|
|
612
|
+
if (e.key === 'ArrowUp' || e.key === 'k') { e.preventDefault(); highlight(Math.max(activeStep - 1, 0)); }
|
|
613
|
+
});
|
|
614
|
+
</script>
|
|
615
|
+
</body>
|
|
616
|
+
</html>`;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
// ── Main loop ────────────────────────────────────────────────────────────────
|
|
620
|
+
|
|
621
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
622
|
+
const rendered = [];
|
|
623
|
+
|
|
624
|
+
for (const result of results) {
|
|
625
|
+
const dialogueId = result.dialogue_id;
|
|
626
|
+
const logFiles = fs.readdirSync(LOGS_DIR).filter(f => f.includes(dialogueId));
|
|
627
|
+
|
|
628
|
+
if (logFiles.length === 0) {
|
|
629
|
+
console.log(` ⚠ No log file for ${dialogueId}, skipping`);
|
|
630
|
+
continue;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
const logPath = path.join(LOGS_DIR, logFiles[0]);
|
|
634
|
+
const log = JSON.parse(fs.readFileSync(logPath, 'utf8'));
|
|
635
|
+
const trace = log.consolidatedTrace || log.dialogueTrace || [];
|
|
636
|
+
|
|
637
|
+
if (trace.length === 0) {
|
|
638
|
+
console.log(` ⚠ Empty trace for ${dialogueId}, skipping`);
|
|
639
|
+
continue;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
const steps = traceToSteps(trace);
|
|
643
|
+
if (steps.length === 0) {
|
|
644
|
+
console.log(` ⚠ No sequence steps for ${dialogueId}, skipping`);
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Collect metadata from log + DB result for the header
|
|
649
|
+
// Resolve learner models from config
|
|
650
|
+
let learnerEgoModel = '', learnerSuperegoModel = '';
|
|
651
|
+
try {
|
|
652
|
+
const learnerYaml = YAML.parse(fs.readFileSync(path.join(process.cwd(), 'config/learner-agents.yaml'), 'utf8'));
|
|
653
|
+
const arch = log.learnerArchitecture || 'unified';
|
|
654
|
+
const prof = learnerYaml.profiles?.[arch];
|
|
655
|
+
if (prof?.ego) {
|
|
656
|
+
learnerEgoModel = (prof.ego.provider ? prof.ego.provider + '.' : '') + (prof.ego.model || '');
|
|
657
|
+
learnerSuperegoModel = (prof.superego?.provider ? prof.superego.provider + '.' : '') + (prof.superego?.model || '');
|
|
658
|
+
} else if (prof?.unified_learner) {
|
|
659
|
+
learnerEgoModel = (prof.unified_learner.provider ? prof.unified_learner.provider + '.' : '') + (prof.unified_learner.model || '');
|
|
660
|
+
learnerSuperegoModel = learnerEgoModel;
|
|
661
|
+
}
|
|
662
|
+
} catch {}
|
|
663
|
+
|
|
664
|
+
const meta = {
|
|
665
|
+
runId,
|
|
666
|
+
egoModel: result.ego_model || log.model || '',
|
|
667
|
+
superegoModel: result.superego_model || '',
|
|
668
|
+
judgeModel: result.judge_model || '',
|
|
669
|
+
learnerArch: log.learnerArchitecture || '',
|
|
670
|
+
learnerEgoModel,
|
|
671
|
+
learnerSuperegoModel,
|
|
672
|
+
totalTurns: log.totalTurns || '',
|
|
673
|
+
dialogueId: dialogueId,
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
const html = generateHtml(result, steps, trace, meta);
|
|
677
|
+
const filename = `sequence-${result.profile_name}-${result.scenario_id}-${result.overall_score?.toFixed(0) || '0'}.html`;
|
|
678
|
+
const outPath = path.join(outputDir, filename);
|
|
679
|
+
|
|
680
|
+
fs.writeFileSync(outPath, html);
|
|
681
|
+
rendered.push(outPath);
|
|
682
|
+
console.log(` ✓ ${filename} (${steps.length} steps, score ${result.overall_score?.toFixed(1)})`);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
console.log(`\nRendered ${rendered.length} diagram(s) to ${outputDir}/`);
|
|
686
|
+
|
|
687
|
+
if (shouldOpen && rendered.length > 0) {
|
|
688
|
+
try {
|
|
689
|
+
execSync(`open "${rendered[0]}"`);
|
|
690
|
+
console.log(`Opened: ${path.basename(rendered[0])}`);
|
|
691
|
+
} catch { /* ignore */ }
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
db.close();
|