@machinespirits/eval 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +91 -9
  2. package/config/eval-settings.yaml +3 -3
  3. package/config/paper-manifest.json +486 -0
  4. package/config/providers.yaml +9 -6
  5. package/config/tutor-agents.yaml +2261 -0
  6. package/content/README.md +23 -0
  7. package/content/courses/479/course.md +53 -0
  8. package/content/courses/479/lecture-1.md +361 -0
  9. package/content/courses/479/lecture-2.md +360 -0
  10. package/content/courses/479/lecture-3.md +655 -0
  11. package/content/courses/479/lecture-4.md +530 -0
  12. package/content/courses/479/lecture-5.md +326 -0
  13. package/content/courses/479/lecture-6.md +346 -0
  14. package/content/courses/479/lecture-7.md +326 -0
  15. package/content/courses/479/lecture-8.md +273 -0
  16. package/content/courses/479/roadmap-slides.md +656 -0
  17. package/content/manifest.yaml +8 -0
  18. package/docs/research/build.sh +44 -20
  19. package/docs/research/figures/figure10.png +0 -0
  20. package/docs/research/figures/figure11.png +0 -0
  21. package/docs/research/figures/figure3.png +0 -0
  22. package/docs/research/figures/figure4.png +0 -0
  23. package/docs/research/figures/figure5.png +0 -0
  24. package/docs/research/figures/figure6.png +0 -0
  25. package/docs/research/figures/figure7.png +0 -0
  26. package/docs/research/figures/figure8.png +0 -0
  27. package/docs/research/figures/figure9.png +0 -0
  28. package/docs/research/header.tex +23 -2
  29. package/docs/research/paper-full.md +941 -285
  30. package/docs/research/paper-short.md +216 -585
  31. package/docs/research/references.bib +132 -0
  32. package/docs/research/slides-header.tex +188 -0
  33. package/docs/research/slides-pptx.md +363 -0
  34. package/docs/research/slides.md +531 -0
  35. package/docs/research/style-reference-pptx.py +199 -0
  36. package/package.json +6 -5
  37. package/scripts/analyze-eval-results.js +69 -17
  38. package/scripts/analyze-mechanism-traces.js +763 -0
  39. package/scripts/analyze-modulation-learning.js +498 -0
  40. package/scripts/analyze-prosthesis.js +144 -0
  41. package/scripts/analyze-run.js +264 -79
  42. package/scripts/assess-transcripts.js +853 -0
  43. package/scripts/browse-transcripts.js +854 -0
  44. package/scripts/check-parse-failures.js +73 -0
  45. package/scripts/code-dialectical-modulation.js +1320 -0
  46. package/scripts/download-data.sh +55 -0
  47. package/scripts/eval-cli.js +106 -18
  48. package/scripts/generate-paper-figures.js +663 -0
  49. package/scripts/generate-paper-figures.py +577 -76
  50. package/scripts/generate-paper-tables.js +299 -0
  51. package/scripts/qualitative-analysis-ai.js +3 -3
  52. package/scripts/render-sequence-diagram.js +694 -0
  53. package/scripts/test-latency.js +210 -0
  54. package/scripts/test-rate-limit.js +95 -0
  55. package/scripts/test-token-budget.js +332 -0
  56. package/scripts/validate-paper-manifest.js +670 -0
  57. package/services/__tests__/evalConfigLoader.test.js +2 -2
  58. package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
  59. package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
  60. package/services/evaluationRunner.js +975 -98
  61. package/services/evaluationStore.js +12 -4
  62. package/services/learnerTutorInteractionEngine.js +27 -2
  63. package/services/mockProvider.js +133 -0
  64. package/services/promptRewriter.js +1471 -5
  65. package/services/rubricEvaluator.js +55 -2
  66. package/services/transcriptFormatter.js +675 -0
  67. package/docs/EVALUATION-VARIABLES.md +0 -589
  68. package/docs/REPLICATION-PLAN.md +0 -577
  69. package/scripts/analyze-run.mjs +0 -282
  70. package/scripts/compare-runs.js +0 -44
  71. package/scripts/compare-suggestions.js +0 -80
  72. package/scripts/dig-into-run.js +0 -158
  73. package/scripts/show-failed-suggestions.js +0 -64
  74. /package/scripts/{check-run.mjs → check-run.js} +0 -0
@@ -0,0 +1,675 @@
1
+ /**
2
+ * Transcript Formatter
3
+ *
4
+ * Pure formatting module — takes a consolidatedTrace array and returns
5
+ * human-readable text in a play/dramaturgical format.
6
+ *
7
+ * Modes:
8
+ * play Full dramaturgical format with asides, reflections, and metadata
9
+ * compact Turn headers + final messages + superego verdicts (with metadata)
10
+ * messages-only Just the learner↔tutor exchange
11
+ * full Like play but includes raw metrics, token counts, model info per entry
12
+ * bilateral Dialogue-turn-level grouping for multi-turn bilateral traces:
13
+ * splits on final_output boundaries, shows tutor then learner
14
+ * deliberation within each dialogue turn
15
+ */
16
+
17
+ const DEFAULT_WIDTH = 72;
18
+ const INDENT = ' ';
19
+ const ASIDE_INDENT = ' ';
20
+
21
+ /**
22
+ * Word-wrap text to a given width, respecting existing line breaks.
23
+ */
24
+ export function wrapText(text, indent = '', maxWidth = DEFAULT_WIDTH) {
25
+ if (!text) return '';
26
+ const effectiveWidth = maxWidth - indent.length;
27
+ if (effectiveWidth < 20) return indent + text;
28
+
29
+ const lines = text.split('\n');
30
+ const wrapped = [];
31
+
32
+ for (const line of lines) {
33
+ if (line.trim() === '') {
34
+ wrapped.push('');
35
+ continue;
36
+ }
37
+ const words = line.split(/\s+/);
38
+ let current = '';
39
+ for (const word of words) {
40
+ if (current.length + word.length + 1 > effectiveWidth && current.length > 0) {
41
+ wrapped.push(indent + current);
42
+ current = word;
43
+ } else {
44
+ current = current ? current + ' ' + word : word;
45
+ }
46
+ }
47
+ if (current) wrapped.push(indent + current);
48
+ }
49
+
50
+ return wrapped.join('\n');
51
+ }
52
+
53
+ /**
54
+ * Format a short model alias from a full model string.
55
+ * e.g. "nvidia/nemotron-3-nano-30b-a3b:free" → "nemotron-3-nano"
56
+ * "moonshot-ai/kimi-k2.5" → "kimi-k2.5"
57
+ */
58
+ function shortModel(model) {
59
+ if (!model) return null;
60
+ // Strip provider prefix (openrouter/...)
61
+ const name = model.includes('/') ? model.split('/').pop() : model;
62
+ // Strip :free, :extended suffixes
63
+ const base = name.split(':')[0];
64
+ // Truncate to keep readable (max ~20 chars)
65
+ return base.length > 22 ? base.substring(0, 20) + '..' : base;
66
+ }
67
+
68
+ /**
69
+ * Format latency in human-readable form.
70
+ */
71
+ function formatLatency(ms) {
72
+ if (ms == null) return null;
73
+ if (ms < 1000) return `${ms}ms`;
74
+ return `${(ms / 1000).toFixed(1)}s`;
75
+ }
76
+
77
+ /**
78
+ * Format token count compactly.
79
+ */
80
+ function formatTokens(input, output) {
81
+ if (input == null && output == null) return null;
82
+ const parts = [];
83
+ if (input != null) parts.push(`${input}in`);
84
+ if (output != null) parts.push(`${output}out`);
85
+ return parts.join('/');
86
+ }
87
+
88
+ /**
89
+ * Format cost compactly.
90
+ */
91
+ function formatCost(cost) {
92
+ if (cost == null || cost === 0) return null;
93
+ if (cost < 0.01) return `$${cost.toFixed(4)}`;
94
+ return `$${cost.toFixed(3)}`;
95
+ }
96
+
97
+ /**
98
+ * Build a metadata subtitle line from a trace entry's metrics.
99
+ * Returns null if no metadata is available.
100
+ *
101
+ * Tutor-core entries have: metrics.{model, inputTokens, outputTokens, latencyMs, cost}
102
+ * EvaluationRunner entries have: timestamp (but no metrics)
103
+ */
104
+ function buildMetadataLine(entry, detail) {
105
+ // messages-only mode: no metadata
106
+ if (detail === 'messages-only') return null;
107
+
108
+ const m = entry.metrics || {};
109
+ const parts = [];
110
+
111
+ const model = shortModel(m.model);
112
+ if (model) parts.push(model);
113
+
114
+ const tokens = formatTokens(m.inputTokens, m.outputTokens);
115
+ if (tokens) parts.push(tokens);
116
+
117
+ const latency = formatLatency(m.latencyMs ?? entry.latencyMs);
118
+ if (latency) parts.push(latency);
119
+
120
+ const cost = formatCost(m.cost);
121
+ if (cost) parts.push(cost);
122
+
123
+ if (parts.length === 0) return null;
124
+ return parts.join(' \u00b7 '); // middle dot separator
125
+ }
126
+
127
+ /**
128
+ * Map a trace entry's agent:action to a readable speaker label.
129
+ */
130
+ function getSpeakerLabel(entry) {
131
+ const { agent, action } = entry;
132
+
133
+ // Learner-related entries
134
+ if (agent === 'user' && action === 'turn_action') return 'LEARNER';
135
+ if (agent === 'learner_ego' && action === 'deliberation') return 'LEARNER EGO';
136
+ if (agent === 'learner_superego' && action === 'deliberation') return 'LEARNER SUPEREGO';
137
+ if (agent === 'learner_synthesis' && action === 'response') return 'LEARNER';
138
+
139
+ // Tutor ego/superego
140
+ if (agent === 'ego' && action === 'generate') return 'TUTOR EGO (draft)';
141
+ if (agent === 'superego' && action === 'review') return 'SUPEREGO';
142
+ if (agent === 'ego' && action === 'revise') return 'TUTOR EGO (revised)';
143
+ if (agent === 'ego' && action === 'generate_final') return 'TUTOR EGO';
144
+
145
+ // Self-reflections
146
+ if (agent === 'ego_self_reflection') return 'EGO';
147
+ if (agent === 'superego_self_reflection') return 'SUPEREGO';
148
+ if (agent === 'superego_disposition') return 'SUPEREGO';
149
+ if (agent === 'ego_intersubjective') return 'EGO';
150
+
151
+ // Profiling
152
+ if (agent === 'tutor_other_ego') return 'TUTOR';
153
+ if (agent === 'learner_other_ego') return 'LEARNER';
154
+ if (agent === 'ego_strategy') return 'EGO';
155
+
156
+ // System/meta
157
+ if (agent === 'behavioral_overrides') return 'SYSTEM';
158
+ if (agent === 'rejection_budget') return 'SYSTEM';
159
+ if (agent === 'user' && action === 'context_input') return 'CONTEXT';
160
+ if (agent === 'user' && action === 'final_output') return null; // skip in output
161
+
162
+ return (agent || 'UNKNOWN').toUpperCase();
163
+ }
164
+
165
+ /**
166
+ * Get a stage direction for the entry (shown in brackets before content).
167
+ */
168
+ function getStageDirection(entry) {
169
+ const { agent, action } = entry;
170
+
171
+ if (agent === 'superego' && action === 'review') {
172
+ return entry.approved ? '[aside, to Ego \u2014 APPROVED]' : '[aside, to Ego]';
173
+ }
174
+ if (agent === 'ego_self_reflection' && action === 'rewrite') return '[reflecting]';
175
+ if (agent === 'superego_self_reflection' && action === 'rewrite') return '[reflecting]';
176
+ if (agent === 'superego_disposition' && action === 'rewrite') return '[evolving disposition]';
177
+ if (agent === 'ego_intersubjective' && action === 'respond_to_critic') return '[responding to critic]';
178
+ if (agent === 'tutor_other_ego' && action === 'profile_learner') return '[profiling learner]';
179
+ if (agent === 'learner_other_ego' && action === 'profile_tutor') return '[profiling tutor]';
180
+ if (agent === 'ego_strategy' && action === 'plan') return '[planning strategy]';
181
+ if (agent === 'learner_ego' && action === 'deliberation') return '[internal]';
182
+ if (agent === 'learner_superego' && action === 'deliberation') return '[internal]';
183
+ if (agent === 'behavioral_overrides') return '[system]';
184
+ if (agent === 'rejection_budget') return '[system]';
185
+
186
+ return null;
187
+ }
188
+
189
+ /**
190
+ * Extract the displayable content from a trace entry.
191
+ */
192
+ function getEntryContent(entry) {
193
+ const { agent, action } = entry;
194
+
195
+ // Superego review: show feedback + verdict
196
+ if (agent === 'superego' && action === 'review') {
197
+ const feedback = entry.feedback || entry.verdict?.feedback || '';
198
+ const verdict = entry.approved ? '' : '\n[REVISE]';
199
+ return feedback + verdict;
200
+ }
201
+
202
+ // Generation entries: extract suggestion message text
203
+ if ((action === 'generate' || action === 'revise' || action === 'generate_final') && entry.suggestions?.length > 0) {
204
+ return entry.suggestions.map(s => s.message || s.text || s.title || JSON.stringify(s)).join('\n\n');
205
+ }
206
+
207
+ // Learner turn action
208
+ if (agent === 'user' && action === 'turn_action') {
209
+ return entry.contextSummary || entry.detail || '';
210
+ }
211
+
212
+ // Context input
213
+ if (agent === 'user' && action === 'context_input') {
214
+ const ctx = entry.contextData || {};
215
+ const parts = [];
216
+ if (ctx.currentPage) parts.push(ctx.currentPage.replace(/^\*+:\s*/, ''));
217
+ if (ctx.strugglesCount) parts.push(`${ctx.strugglesCount} struggle signals`);
218
+ if (ctx.sessions) parts.push(`${ctx.sessions} prior sessions`);
219
+ // Extract the learner's message from rawContext if present
220
+ const raw = entry.rawContext || '';
221
+ const msgMatch = raw.match(/Learner Messages?:\s*(.+?)(?:\n<\/|$)/s)
222
+ || raw.match(/Recent Chat History\n-\s*User:\s*"(.+?)"/s);
223
+ if (msgMatch) {
224
+ const contextLine = parts.length ? parts.join(', ') : '';
225
+ return (contextLine ? contextLine + '\n\n' : '') + 'Learner: ' + msgMatch[1].trim();
226
+ }
227
+ return parts.length ? parts.join(', ') : entry.contextSummary || '(scenario input)';
228
+ }
229
+
230
+ // Reflection/rewrite entries
231
+ if (action === 'rewrite' || action === 'respond_to_critic' || action === 'profile_learner' || action === 'profile_tutor' || action === 'plan') {
232
+ return entry.detail || entry.contextSummary || '';
233
+ }
234
+
235
+ // Learner deliberation
236
+ if (action === 'deliberation' || action === 'response') {
237
+ return entry.detail || entry.contextSummary || '';
238
+ }
239
+
240
+ // System entries
241
+ if (agent === 'behavioral_overrides' || agent === 'rejection_budget') {
242
+ return entry.contextSummary || entry.detail || '';
243
+ }
244
+
245
+ // Fallback
246
+ return entry.detail || entry.contextSummary || entry.content || entry.message || '';
247
+ }
248
+
249
+ /**
250
+ * Determine if an entry is a "between-turn" reflection (intermission material).
251
+ */
252
+ function isReflectionEntry(entry) {
253
+ const reflectionAgents = new Set([
254
+ 'ego_self_reflection', 'superego_self_reflection', 'superego_disposition',
255
+ 'ego_intersubjective', 'behavioral_overrides', 'rejection_budget',
256
+ 'tutor_other_ego', 'learner_other_ego', 'ego_strategy',
257
+ ]);
258
+ return reflectionAgents.has(entry.agent);
259
+ }
260
+
261
+ /**
262
+ * Check if an entry should be shown in compact mode.
263
+ */
264
+ function isCompactVisible(entry) {
265
+ const { agent, action } = entry;
266
+ // Show: learner messages, final tutor output, superego verdicts
267
+ if (agent === 'user' && action === 'turn_action') return true;
268
+ if (action === 'revise' || action === 'generate_final') return true;
269
+ if (agent === 'ego' && action === 'generate' && !entry._hasRevision) return true;
270
+ if (agent === 'superego' && action === 'review') return true;
271
+ if (agent === 'user' && action === 'final_output') return false;
272
+ return false;
273
+ }
274
+
275
+ /**
276
+ * Check if an entry should be shown in messages-only mode.
277
+ */
278
+ function isMessageVisible(entry) {
279
+ const { agent, action } = entry;
280
+ if (agent === 'user' && action === 'turn_action') return true;
281
+ if (action === 'revise') return true;
282
+ if (agent === 'ego' && action === 'generate' && !entry._hasRevision) return true;
283
+ if (agent === 'learner_synthesis' && action === 'response') return true;
284
+ return false;
285
+ }
286
+
287
+ /**
288
+ * Format a single trace entry.
289
+ */
290
+ export function formatEntry(entry, options = {}) {
291
+ const { detail = 'play' } = options;
292
+ const speaker = getSpeakerLabel(entry);
293
+ if (!speaker) return null;
294
+
295
+ const direction = getStageDirection(entry);
296
+ const content = getEntryContent(entry);
297
+ if (!content && !direction) return null;
298
+
299
+ const lines = [];
300
+
301
+ // Speaker name
302
+ lines.push(INDENT + speaker);
303
+
304
+ // Metadata subtitle (model, tokens, time, cost) — shown in play, compact, full modes
305
+ const metaLine = buildMetadataLine(entry, detail);
306
+ if (metaLine) {
307
+ lines.push(INDENT + ' ' + metaLine);
308
+ }
309
+
310
+ // Stage direction
311
+ if (direction && detail !== 'messages-only') {
312
+ lines.push(ASIDE_INDENT + direction);
313
+ }
314
+
315
+ // Content
316
+ if (content) {
317
+ const indent = direction ? ASIDE_INDENT : INDENT;
318
+ lines.push(wrapText(content, indent, DEFAULT_WIDTH));
319
+ }
320
+
321
+ // Full mode: add raw timestamp and additional detail
322
+ if (detail === 'full') {
323
+ const extra = [];
324
+ if (entry.timestamp) extra.push(`time=${entry.timestamp}`);
325
+ if (entry.metrics?.generationId) extra.push(`gen=${entry.metrics.generationId}`);
326
+ if (entry.metrics?.finishReason) extra.push(`finish=${entry.metrics.finishReason}`);
327
+ if (extra.length > 0) {
328
+ lines.push(ASIDE_INDENT + `[${extra.join(', ')}]`);
329
+ }
330
+ }
331
+
332
+ return lines.join('\n');
333
+ }
334
+
335
+ /**
336
+ * Format a full transcript from a consolidated trace array.
337
+ *
338
+ * @param {Array} trace - The consolidatedTrace array
339
+ * @param {Object} options
340
+ * @param {string} options.detail - 'play' | 'compact' | 'messages-only' | 'full' | 'bilateral'
341
+ * @param {string} options.scenarioName - Scenario title for the header
342
+ * @param {string} options.profileName - Cell/profile name
343
+ * @param {number} options.totalTurns - Total number of dialogue turns
344
+ * @returns {string} Formatted transcript text
345
+ */
346
+ export function formatTranscript(trace, options = {}) {
347
+ const { detail = 'play', scenarioName = '', profileName = '', totalTurns = 0 } = options;
348
+
349
+ if (!trace || trace.length === 0) return '(empty trace)\n';
350
+
351
+ // Bilateral mode uses dialogue-turn-level grouping instead of turnIndex
352
+ if (detail === 'bilateral') {
353
+ return formatBilateralTranscript(trace, options);
354
+ }
355
+
356
+ // Pre-process: mark entries that have a revision following them
357
+ const processed = trace.map((entry, i) => {
358
+ const copy = { ...entry };
359
+ if (entry.agent === 'ego' && entry.action === 'generate') {
360
+ // Check if a revision follows within the same turn
361
+ const hasRevision = trace.slice(i + 1).some(
362
+ e => e.turnIndex === entry.turnIndex && e.agent === 'ego' && (e.action === 'revise' || e.action === 'generate_final')
363
+ );
364
+ copy._hasRevision = hasRevision;
365
+ }
366
+ return copy;
367
+ });
368
+
369
+ const lines = [];
370
+
371
+ // Header
372
+ const center = (text) => {
373
+ const pad = Math.max(0, Math.floor((DEFAULT_WIDTH - text.length) / 2));
374
+ return ' '.repeat(pad) + text;
375
+ };
376
+
377
+ if (scenarioName || profileName) {
378
+ lines.push('');
379
+ if (scenarioName) {
380
+ const titleLine = totalTurns > 0 ? `${scenarioName.toUpperCase()} (${totalTurns}-turn)` : scenarioName.toUpperCase();
381
+ lines.push(center(titleLine));
382
+ }
383
+ if (profileName) lines.push(center(profileName));
384
+ lines.push(center('\u2500'.repeat(Math.min(DEFAULT_WIDTH - 10, 40))));
385
+ lines.push('');
386
+ }
387
+
388
+ // Group entries by turnIndex
389
+ const turnGroups = new Map();
390
+ for (const entry of processed) {
391
+ const ti = entry.turnIndex ?? 0;
392
+ if (!turnGroups.has(ti)) turnGroups.set(ti, []);
393
+ turnGroups.get(ti).push(entry);
394
+ }
395
+
396
+ const sortedTurns = [...turnGroups.keys()].sort((a, b) => a - b);
397
+
398
+ for (const turnIdx of sortedTurns) {
399
+ const entries = turnGroups.get(turnIdx);
400
+
401
+ // ACT header
402
+ lines.push('');
403
+ lines.push(center(`ACT ${turnIdx + 1}`));
404
+ lines.push('');
405
+
406
+ // Separate main entries from reflections
407
+ const mainEntries = entries.filter(e => !isReflectionEntry(e));
408
+ const reflections = entries.filter(e => isReflectionEntry(e));
409
+
410
+ // Main entries
411
+ for (const entry of mainEntries) {
412
+ // Visibility filters
413
+ if (detail === 'compact' && !isCompactVisible(entry)) continue;
414
+ if (detail === 'messages-only' && !isMessageVisible(entry)) continue;
415
+
416
+ const formatted = formatEntry(entry, { detail });
417
+ if (formatted) {
418
+ lines.push(formatted);
419
+ lines.push('');
420
+ }
421
+ }
422
+
423
+ // Reflections (intermission)
424
+ if (reflections.length > 0 && detail !== 'messages-only') {
425
+ if (detail !== 'compact') {
426
+ lines.push(center('~~~ intermission ~~~'));
427
+ lines.push('');
428
+ }
429
+
430
+ for (const entry of reflections) {
431
+ if (detail === 'compact') {
432
+ // One-liner for compact mode
433
+ const speaker = getSpeakerLabel(entry);
434
+ const summary = (entry.contextSummary || entry.detail || '').substring(0, 80);
435
+ if (speaker && summary) {
436
+ lines.push(`${INDENT}[${speaker}] ${summary}`);
437
+ lines.push('');
438
+ }
439
+ } else {
440
+ const formatted = formatEntry(entry, { detail });
441
+ if (formatted) {
442
+ lines.push(formatted);
443
+ lines.push('');
444
+ }
445
+ }
446
+ }
447
+ }
448
+ }
449
+
450
+ return lines.join('\n');
451
+ }
452
+
453
+ /**
454
+ * Classify a trace entry as belonging to the tutor phase or learner phase.
455
+ */
456
+ function isTutorEntry(entry) {
457
+ const tutorAgents = new Set([
458
+ 'ego', 'superego', 'ego_self_reflection', 'superego_self_reflection',
459
+ 'superego_disposition', 'ego_intersubjective', 'tutor_other_ego', 'ego_strategy',
460
+ 'behavioral_overrides', 'rejection_budget',
461
+ ]);
462
+ if (tutorAgents.has(entry.agent)) return true;
463
+ // context_input and final_output are tutor-phase bookends
464
+ if (entry.agent === 'user' && (entry.action === 'context_input' || entry.action === 'final_output')) return true;
465
+ // system entries (memory_cycle, etc.) belong to tutor phase
466
+ if (entry.agent === 'system') return true;
467
+ return false;
468
+ }
469
+
470
+ /**
471
+ * Format a bilateral transcript: splits trace into dialogue turns using
472
+ * final_output boundaries, then shows tutor and learner deliberation
473
+ * sequentially within each turn.
474
+ *
475
+ * This gives a per-dialogue-turn view:
476
+ * TURN 1
477
+ * ── TUTOR DELIBERATION ──
478
+ * context_input → ego generate → superego review → ...
479
+ * ── LEARNER DELIBERATION ──
480
+ * learner_ego → learner_superego → learner_ego_revision → learner_synthesis
481
+ * ── LEARNER MESSAGE ──
482
+ * (the external turn_action)
483
+ * TURN 2
484
+ * ...
485
+ */
486
+ function formatBilateralTranscript(trace, options = {}) {
487
+ const { scenarioName = '', profileName = '', totalTurns = 0 } = options;
488
+
489
+ // Pre-process: mark ego generate entries that have revisions
490
+ const processed = trace.map((entry, i) => {
491
+ const copy = { ...entry };
492
+ if (entry.agent === 'ego' && entry.action === 'generate') {
493
+ const hasRevision = trace.slice(i + 1).some(
494
+ e => e.agent === 'ego' && (e.action === 'revise' || e.action === 'generate_final' || e.action === 'incorporate-feedback')
495
+ );
496
+ copy._hasRevision = hasRevision;
497
+ }
498
+ return copy;
499
+ });
500
+
501
+ // Split into dialogue turns.
502
+ // Each "dialogue turn" = tutor deliberation block + learner deliberation block.
503
+ // Tutor block ends at final_output; learner block ends at turn_action.
504
+ // For traces without final_output (unified single-turn), fall back to
505
+ // splitting on turn_action.
506
+ const dialogueTurns = [];
507
+ let currentEntries = [];
508
+
509
+ for (const entry of processed) {
510
+ currentEntries.push(entry);
511
+
512
+ // turn_action marks the end of a full dialogue turn (tutor + learner)
513
+ if (entry.agent === 'user' && entry.action === 'turn_action') {
514
+ dialogueTurns.push(currentEntries);
515
+ currentEntries = [];
516
+ }
517
+ }
518
+
519
+ // Remaining entries after last turn_action (trailing tutor deliberation with no learner response)
520
+ if (currentEntries.length > 0) {
521
+ dialogueTurns.push(currentEntries);
522
+ }
523
+
524
+ const lines = [];
525
+
526
+ const center = (text) => {
527
+ const pad = Math.max(0, Math.floor((DEFAULT_WIDTH - text.length) / 2));
528
+ return ' '.repeat(pad) + text;
529
+ };
530
+
531
+ // Header
532
+ if (scenarioName || profileName) {
533
+ lines.push('');
534
+ if (scenarioName) {
535
+ const titleLine = totalTurns > 0 ? `${scenarioName.toUpperCase()} (${totalTurns}-turn)` : scenarioName.toUpperCase();
536
+ lines.push(center(titleLine));
537
+ }
538
+ if (profileName) lines.push(center(profileName));
539
+ lines.push(center('\u2500'.repeat(Math.min(DEFAULT_WIDTH - 10, 40))));
540
+ lines.push('');
541
+ }
542
+
543
+ const PHASE_LINE = '\u2500'.repeat(30);
544
+
545
+ for (let turnNum = 0; turnNum < dialogueTurns.length; turnNum++) {
546
+ const entries = dialogueTurns[turnNum];
547
+
548
+ // Turn header
549
+ lines.push('');
550
+ lines.push(center(`TURN ${turnNum + 1}`));
551
+ lines.push('');
552
+
553
+ // Split entries into phases: tutor deliberation, learner deliberation, learner message
554
+ const tutorEntries = [];
555
+ const learnerDeliberation = [];
556
+ let learnerMessage = null;
557
+ const reflections = [];
558
+
559
+ for (const entry of entries) {
560
+ if (isReflectionEntry(entry)) {
561
+ reflections.push(entry);
562
+ } else if (entry.agent === 'user' && entry.action === 'turn_action') {
563
+ learnerMessage = entry;
564
+ } else if (isTutorEntry(entry)) {
565
+ tutorEntries.push(entry);
566
+ } else {
567
+ // Learner ego/superego/synthesis deliberation
568
+ learnerDeliberation.push(entry);
569
+ }
570
+ }
571
+
572
+ // ── TUTOR DELIBERATION ──
573
+ if (tutorEntries.length > 0) {
574
+ lines.push(INDENT + `\u2500\u2500 TUTOR DELIBERATION ${PHASE_LINE}`);
575
+ lines.push('');
576
+
577
+ for (const entry of tutorEntries) {
578
+ // Skip final_output markers (they're structural, not content)
579
+ if (entry.agent === 'user' && entry.action === 'final_output') continue;
580
+ // Skip repeated context_input after the first turn — it's the same scenario data re-injected
581
+ if (entry.agent === 'user' && entry.action === 'context_input' && turnNum > 0) continue;
582
+
583
+ const formatted = formatEntry(entry, { detail: 'play' });
584
+ if (formatted) {
585
+ lines.push(formatted);
586
+ lines.push('');
587
+ }
588
+ }
589
+ }
590
+
591
+ // ── LEARNER DELIBERATION ──
592
+ // Skip learner_synthesis — it duplicates the turn_action content shown in LEARNER MESSAGE
593
+ const deliberationOnly = learnerDeliberation.filter(
594
+ e => !(e.agent === 'learner_synthesis' && e.action === 'response')
595
+ );
596
+ if (deliberationOnly.length > 0) {
597
+ lines.push(INDENT + `\u2500\u2500 LEARNER DELIBERATION ${PHASE_LINE}`);
598
+ lines.push('');
599
+
600
+ for (const entry of deliberationOnly) {
601
+ const formatted = formatEntry(entry, { detail: 'play' });
602
+ if (formatted) {
603
+ lines.push(formatted);
604
+ lines.push('');
605
+ }
606
+ }
607
+ }
608
+
609
+ // ── LEARNER MESSAGE ── (the external turn_action)
610
+ if (learnerMessage) {
611
+ lines.push(INDENT + `\u2500\u2500 LEARNER MESSAGE ${PHASE_LINE}`);
612
+ lines.push('');
613
+ const formatted = formatEntry(learnerMessage, { detail: 'play' });
614
+ if (formatted) {
615
+ lines.push(formatted);
616
+ lines.push('');
617
+ }
618
+ }
619
+
620
+ // Between-turn reflections (intermission)
621
+ if (reflections.length > 0) {
622
+ lines.push(center('~~~ intermission ~~~'));
623
+ lines.push('');
624
+ for (const entry of reflections) {
625
+ const formatted = formatEntry(entry, { detail: 'play' });
626
+ if (formatted) {
627
+ lines.push(formatted);
628
+ lines.push('');
629
+ }
630
+ }
631
+ }
632
+ }
633
+
634
+ return lines.join('\n');
635
+ }
636
+
637
+ /**
638
+ * Format a single entry for incremental/streaming output (one line per event).
639
+ * Used for live console output during runs.
640
+ */
641
+ export function formatCompactLine(entry) {
642
+ const speaker = getSpeakerLabel(entry);
643
+ if (!speaker) return null;
644
+
645
+ const { agent, action } = entry;
646
+ const meta = buildMetadataLine(entry, 'compact');
647
+ const metaSuffix = meta ? ` (${meta})` : '';
648
+
649
+ // Learner message
650
+ if (agent === 'user' && action === 'turn_action') {
651
+ const msg = (entry.contextSummary || entry.detail || '').substring(0, 120);
652
+ return ` [LEARNER] ${msg}`;
653
+ }
654
+
655
+ // Superego review
656
+ if (agent === 'superego' && action === 'review') {
657
+ const verdict = entry.approved ? 'APPROVED' : 'REVISE';
658
+ const feedback = (entry.feedback || entry.verdict?.feedback || '').substring(0, 80);
659
+ return ` [SUPEREGO ${verdict}]${metaSuffix} ${feedback}`;
660
+ }
661
+
662
+ // Final tutor output (revised or initial)
663
+ if (action === 'revise' || (agent === 'ego' && action === 'generate' && !entry._hasRevision)) {
664
+ const msg = (entry.suggestions || []).map(s => (s.message || s.title || '').substring(0, 80)).join('; ');
665
+ return ` [TUTOR]${metaSuffix} ${msg}`;
666
+ }
667
+
668
+ // Reflections (compact one-liner)
669
+ if (isReflectionEntry(entry)) {
670
+ const summary = (entry.contextSummary || '').substring(0, 80);
671
+ return ` [${speaker}] ${summary}`;
672
+ }
673
+
674
+ return null;
675
+ }