@machinespirits/eval 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -9
- package/config/eval-settings.yaml +3 -3
- package/config/paper-manifest.json +486 -0
- package/config/providers.yaml +9 -6
- package/config/tutor-agents.yaml +2261 -0
- package/content/README.md +23 -0
- package/content/courses/479/course.md +53 -0
- package/content/courses/479/lecture-1.md +361 -0
- package/content/courses/479/lecture-2.md +360 -0
- package/content/courses/479/lecture-3.md +655 -0
- package/content/courses/479/lecture-4.md +530 -0
- package/content/courses/479/lecture-5.md +326 -0
- package/content/courses/479/lecture-6.md +346 -0
- package/content/courses/479/lecture-7.md +326 -0
- package/content/courses/479/lecture-8.md +273 -0
- package/content/courses/479/roadmap-slides.md +656 -0
- package/content/manifest.yaml +8 -0
- package/docs/research/build.sh +44 -20
- package/docs/research/figures/figure10.png +0 -0
- package/docs/research/figures/figure11.png +0 -0
- package/docs/research/figures/figure3.png +0 -0
- package/docs/research/figures/figure4.png +0 -0
- package/docs/research/figures/figure5.png +0 -0
- package/docs/research/figures/figure6.png +0 -0
- package/docs/research/figures/figure7.png +0 -0
- package/docs/research/figures/figure8.png +0 -0
- package/docs/research/figures/figure9.png +0 -0
- package/docs/research/header.tex +23 -2
- package/docs/research/paper-full.md +941 -285
- package/docs/research/paper-short.md +216 -585
- package/docs/research/references.bib +132 -0
- package/docs/research/slides-header.tex +188 -0
- package/docs/research/slides-pptx.md +363 -0
- package/docs/research/slides.md +531 -0
- package/docs/research/style-reference-pptx.py +199 -0
- package/package.json +6 -5
- package/scripts/analyze-eval-results.js +69 -17
- package/scripts/analyze-mechanism-traces.js +763 -0
- package/scripts/analyze-modulation-learning.js +498 -0
- package/scripts/analyze-prosthesis.js +144 -0
- package/scripts/analyze-run.js +264 -79
- package/scripts/assess-transcripts.js +853 -0
- package/scripts/browse-transcripts.js +854 -0
- package/scripts/check-parse-failures.js +73 -0
- package/scripts/code-dialectical-modulation.js +1320 -0
- package/scripts/download-data.sh +55 -0
- package/scripts/eval-cli.js +106 -18
- package/scripts/generate-paper-figures.js +663 -0
- package/scripts/generate-paper-figures.py +577 -76
- package/scripts/generate-paper-tables.js +299 -0
- package/scripts/qualitative-analysis-ai.js +3 -3
- package/scripts/render-sequence-diagram.js +694 -0
- package/scripts/test-latency.js +210 -0
- package/scripts/test-rate-limit.js +95 -0
- package/scripts/test-token-budget.js +332 -0
- package/scripts/validate-paper-manifest.js +670 -0
- package/services/__tests__/evalConfigLoader.test.js +2 -2
- package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
- package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
- package/services/evaluationRunner.js +975 -98
- package/services/evaluationStore.js +12 -4
- package/services/learnerTutorInteractionEngine.js +27 -2
- package/services/mockProvider.js +133 -0
- package/services/promptRewriter.js +1471 -5
- package/services/rubricEvaluator.js +55 -2
- package/services/transcriptFormatter.js +675 -0
- package/docs/EVALUATION-VARIABLES.md +0 -589
- package/docs/REPLICATION-PLAN.md +0 -577
- package/scripts/analyze-run.mjs +0 -282
- package/scripts/compare-runs.js +0 -44
- package/scripts/compare-suggestions.js +0 -80
- package/scripts/dig-into-run.js +0 -158
- package/scripts/show-failed-suggestions.js +0 -64
- /package/scripts/{check-run.mjs → check-run.js} +0 -0
|
@@ -0,0 +1,675 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcript Formatter
|
|
3
|
+
*
|
|
4
|
+
* Pure formatting module — takes a consolidatedTrace array and returns
|
|
5
|
+
* human-readable text in a play/dramaturgical format.
|
|
6
|
+
*
|
|
7
|
+
* Modes:
|
|
8
|
+
* play Full dramaturgical format with asides, reflections, and metadata
|
|
9
|
+
* compact Turn headers + final messages + superego verdicts (with metadata)
|
|
10
|
+
* messages-only Just the learner↔tutor exchange
|
|
11
|
+
* full Like play but includes raw metrics, token counts, model info per entry
|
|
12
|
+
* bilateral Dialogue-turn-level grouping for multi-turn bilateral traces:
|
|
13
|
+
* splits on final_output boundaries, shows tutor then learner
|
|
14
|
+
* deliberation within each dialogue turn
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const DEFAULT_WIDTH = 72;
|
|
18
|
+
const INDENT = ' ';
|
|
19
|
+
const ASIDE_INDENT = ' ';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Word-wrap text to a given width, respecting existing line breaks.
|
|
23
|
+
*/
|
|
24
|
+
export function wrapText(text, indent = '', maxWidth = DEFAULT_WIDTH) {
|
|
25
|
+
if (!text) return '';
|
|
26
|
+
const effectiveWidth = maxWidth - indent.length;
|
|
27
|
+
if (effectiveWidth < 20) return indent + text;
|
|
28
|
+
|
|
29
|
+
const lines = text.split('\n');
|
|
30
|
+
const wrapped = [];
|
|
31
|
+
|
|
32
|
+
for (const line of lines) {
|
|
33
|
+
if (line.trim() === '') {
|
|
34
|
+
wrapped.push('');
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
const words = line.split(/\s+/);
|
|
38
|
+
let current = '';
|
|
39
|
+
for (const word of words) {
|
|
40
|
+
if (current.length + word.length + 1 > effectiveWidth && current.length > 0) {
|
|
41
|
+
wrapped.push(indent + current);
|
|
42
|
+
current = word;
|
|
43
|
+
} else {
|
|
44
|
+
current = current ? current + ' ' + word : word;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (current) wrapped.push(indent + current);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return wrapped.join('\n');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Format a short model alias from a full model string.
|
|
55
|
+
* e.g. "nvidia/nemotron-3-nano-30b-a3b:free" → "nemotron-3-nano"
|
|
56
|
+
* "moonshot-ai/kimi-k2.5" → "kimi-k2.5"
|
|
57
|
+
*/
|
|
58
|
+
function shortModel(model) {
|
|
59
|
+
if (!model) return null;
|
|
60
|
+
// Strip provider prefix (openrouter/...)
|
|
61
|
+
const name = model.includes('/') ? model.split('/').pop() : model;
|
|
62
|
+
// Strip :free, :extended suffixes
|
|
63
|
+
const base = name.split(':')[0];
|
|
64
|
+
// Truncate to keep readable (max ~20 chars)
|
|
65
|
+
return base.length > 22 ? base.substring(0, 20) + '..' : base;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Format latency in human-readable form.
|
|
70
|
+
*/
|
|
71
|
+
function formatLatency(ms) {
|
|
72
|
+
if (ms == null) return null;
|
|
73
|
+
if (ms < 1000) return `${ms}ms`;
|
|
74
|
+
return `${(ms / 1000).toFixed(1)}s`;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Format token count compactly.
|
|
79
|
+
*/
|
|
80
|
+
function formatTokens(input, output) {
|
|
81
|
+
if (input == null && output == null) return null;
|
|
82
|
+
const parts = [];
|
|
83
|
+
if (input != null) parts.push(`${input}in`);
|
|
84
|
+
if (output != null) parts.push(`${output}out`);
|
|
85
|
+
return parts.join('/');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Format cost compactly.
|
|
90
|
+
*/
|
|
91
|
+
function formatCost(cost) {
|
|
92
|
+
if (cost == null || cost === 0) return null;
|
|
93
|
+
if (cost < 0.01) return `$${cost.toFixed(4)}`;
|
|
94
|
+
return `$${cost.toFixed(3)}`;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Build a metadata subtitle line from a trace entry's metrics.
|
|
99
|
+
* Returns null if no metadata is available.
|
|
100
|
+
*
|
|
101
|
+
* Tutor-core entries have: metrics.{model, inputTokens, outputTokens, latencyMs, cost}
|
|
102
|
+
* EvaluationRunner entries have: timestamp (but no metrics)
|
|
103
|
+
*/
|
|
104
|
+
function buildMetadataLine(entry, detail) {
|
|
105
|
+
// messages-only mode: no metadata
|
|
106
|
+
if (detail === 'messages-only') return null;
|
|
107
|
+
|
|
108
|
+
const m = entry.metrics || {};
|
|
109
|
+
const parts = [];
|
|
110
|
+
|
|
111
|
+
const model = shortModel(m.model);
|
|
112
|
+
if (model) parts.push(model);
|
|
113
|
+
|
|
114
|
+
const tokens = formatTokens(m.inputTokens, m.outputTokens);
|
|
115
|
+
if (tokens) parts.push(tokens);
|
|
116
|
+
|
|
117
|
+
const latency = formatLatency(m.latencyMs ?? entry.latencyMs);
|
|
118
|
+
if (latency) parts.push(latency);
|
|
119
|
+
|
|
120
|
+
const cost = formatCost(m.cost);
|
|
121
|
+
if (cost) parts.push(cost);
|
|
122
|
+
|
|
123
|
+
if (parts.length === 0) return null;
|
|
124
|
+
return parts.join(' \u00b7 '); // middle dot separator
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Map a trace entry's agent:action to a readable speaker label.
|
|
129
|
+
*/
|
|
130
|
+
function getSpeakerLabel(entry) {
|
|
131
|
+
const { agent, action } = entry;
|
|
132
|
+
|
|
133
|
+
// Learner-related entries
|
|
134
|
+
if (agent === 'user' && action === 'turn_action') return 'LEARNER';
|
|
135
|
+
if (agent === 'learner_ego' && action === 'deliberation') return 'LEARNER EGO';
|
|
136
|
+
if (agent === 'learner_superego' && action === 'deliberation') return 'LEARNER SUPEREGO';
|
|
137
|
+
if (agent === 'learner_synthesis' && action === 'response') return 'LEARNER';
|
|
138
|
+
|
|
139
|
+
// Tutor ego/superego
|
|
140
|
+
if (agent === 'ego' && action === 'generate') return 'TUTOR EGO (draft)';
|
|
141
|
+
if (agent === 'superego' && action === 'review') return 'SUPEREGO';
|
|
142
|
+
if (agent === 'ego' && action === 'revise') return 'TUTOR EGO (revised)';
|
|
143
|
+
if (agent === 'ego' && action === 'generate_final') return 'TUTOR EGO';
|
|
144
|
+
|
|
145
|
+
// Self-reflections
|
|
146
|
+
if (agent === 'ego_self_reflection') return 'EGO';
|
|
147
|
+
if (agent === 'superego_self_reflection') return 'SUPEREGO';
|
|
148
|
+
if (agent === 'superego_disposition') return 'SUPEREGO';
|
|
149
|
+
if (agent === 'ego_intersubjective') return 'EGO';
|
|
150
|
+
|
|
151
|
+
// Profiling
|
|
152
|
+
if (agent === 'tutor_other_ego') return 'TUTOR';
|
|
153
|
+
if (agent === 'learner_other_ego') return 'LEARNER';
|
|
154
|
+
if (agent === 'ego_strategy') return 'EGO';
|
|
155
|
+
|
|
156
|
+
// System/meta
|
|
157
|
+
if (agent === 'behavioral_overrides') return 'SYSTEM';
|
|
158
|
+
if (agent === 'rejection_budget') return 'SYSTEM';
|
|
159
|
+
if (agent === 'user' && action === 'context_input') return 'CONTEXT';
|
|
160
|
+
if (agent === 'user' && action === 'final_output') return null; // skip in output
|
|
161
|
+
|
|
162
|
+
return (agent || 'UNKNOWN').toUpperCase();
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Get a stage direction for the entry (shown in brackets before content).
|
|
167
|
+
*/
|
|
168
|
+
function getStageDirection(entry) {
|
|
169
|
+
const { agent, action } = entry;
|
|
170
|
+
|
|
171
|
+
if (agent === 'superego' && action === 'review') {
|
|
172
|
+
return entry.approved ? '[aside, to Ego \u2014 APPROVED]' : '[aside, to Ego]';
|
|
173
|
+
}
|
|
174
|
+
if (agent === 'ego_self_reflection' && action === 'rewrite') return '[reflecting]';
|
|
175
|
+
if (agent === 'superego_self_reflection' && action === 'rewrite') return '[reflecting]';
|
|
176
|
+
if (agent === 'superego_disposition' && action === 'rewrite') return '[evolving disposition]';
|
|
177
|
+
if (agent === 'ego_intersubjective' && action === 'respond_to_critic') return '[responding to critic]';
|
|
178
|
+
if (agent === 'tutor_other_ego' && action === 'profile_learner') return '[profiling learner]';
|
|
179
|
+
if (agent === 'learner_other_ego' && action === 'profile_tutor') return '[profiling tutor]';
|
|
180
|
+
if (agent === 'ego_strategy' && action === 'plan') return '[planning strategy]';
|
|
181
|
+
if (agent === 'learner_ego' && action === 'deliberation') return '[internal]';
|
|
182
|
+
if (agent === 'learner_superego' && action === 'deliberation') return '[internal]';
|
|
183
|
+
if (agent === 'behavioral_overrides') return '[system]';
|
|
184
|
+
if (agent === 'rejection_budget') return '[system]';
|
|
185
|
+
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Extract the displayable content from a trace entry.
|
|
191
|
+
*/
|
|
192
|
+
function getEntryContent(entry) {
|
|
193
|
+
const { agent, action } = entry;
|
|
194
|
+
|
|
195
|
+
// Superego review: show feedback + verdict
|
|
196
|
+
if (agent === 'superego' && action === 'review') {
|
|
197
|
+
const feedback = entry.feedback || entry.verdict?.feedback || '';
|
|
198
|
+
const verdict = entry.approved ? '' : '\n[REVISE]';
|
|
199
|
+
return feedback + verdict;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Generation entries: extract suggestion message text
|
|
203
|
+
if ((action === 'generate' || action === 'revise' || action === 'generate_final') && entry.suggestions?.length > 0) {
|
|
204
|
+
return entry.suggestions.map(s => s.message || s.text || s.title || JSON.stringify(s)).join('\n\n');
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Learner turn action
|
|
208
|
+
if (agent === 'user' && action === 'turn_action') {
|
|
209
|
+
return entry.contextSummary || entry.detail || '';
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Context input
|
|
213
|
+
if (agent === 'user' && action === 'context_input') {
|
|
214
|
+
const ctx = entry.contextData || {};
|
|
215
|
+
const parts = [];
|
|
216
|
+
if (ctx.currentPage) parts.push(ctx.currentPage.replace(/^\*+:\s*/, ''));
|
|
217
|
+
if (ctx.strugglesCount) parts.push(`${ctx.strugglesCount} struggle signals`);
|
|
218
|
+
if (ctx.sessions) parts.push(`${ctx.sessions} prior sessions`);
|
|
219
|
+
// Extract the learner's message from rawContext if present
|
|
220
|
+
const raw = entry.rawContext || '';
|
|
221
|
+
const msgMatch = raw.match(/Learner Messages?:\s*(.+?)(?:\n<\/|$)/s)
|
|
222
|
+
|| raw.match(/Recent Chat History\n-\s*User:\s*"(.+?)"/s);
|
|
223
|
+
if (msgMatch) {
|
|
224
|
+
const contextLine = parts.length ? parts.join(', ') : '';
|
|
225
|
+
return (contextLine ? contextLine + '\n\n' : '') + 'Learner: ' + msgMatch[1].trim();
|
|
226
|
+
}
|
|
227
|
+
return parts.length ? parts.join(', ') : entry.contextSummary || '(scenario input)';
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Reflection/rewrite entries
|
|
231
|
+
if (action === 'rewrite' || action === 'respond_to_critic' || action === 'profile_learner' || action === 'profile_tutor' || action === 'plan') {
|
|
232
|
+
return entry.detail || entry.contextSummary || '';
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Learner deliberation
|
|
236
|
+
if (action === 'deliberation' || action === 'response') {
|
|
237
|
+
return entry.detail || entry.contextSummary || '';
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// System entries
|
|
241
|
+
if (agent === 'behavioral_overrides' || agent === 'rejection_budget') {
|
|
242
|
+
return entry.contextSummary || entry.detail || '';
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Fallback
|
|
246
|
+
return entry.detail || entry.contextSummary || entry.content || entry.message || '';
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Determine if an entry is a "between-turn" reflection (intermission material).
|
|
251
|
+
*/
|
|
252
|
+
function isReflectionEntry(entry) {
|
|
253
|
+
const reflectionAgents = new Set([
|
|
254
|
+
'ego_self_reflection', 'superego_self_reflection', 'superego_disposition',
|
|
255
|
+
'ego_intersubjective', 'behavioral_overrides', 'rejection_budget',
|
|
256
|
+
'tutor_other_ego', 'learner_other_ego', 'ego_strategy',
|
|
257
|
+
]);
|
|
258
|
+
return reflectionAgents.has(entry.agent);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Check if an entry should be shown in compact mode.
|
|
263
|
+
*/
|
|
264
|
+
function isCompactVisible(entry) {
|
|
265
|
+
const { agent, action } = entry;
|
|
266
|
+
// Show: learner messages, final tutor output, superego verdicts
|
|
267
|
+
if (agent === 'user' && action === 'turn_action') return true;
|
|
268
|
+
if (action === 'revise' || action === 'generate_final') return true;
|
|
269
|
+
if (agent === 'ego' && action === 'generate' && !entry._hasRevision) return true;
|
|
270
|
+
if (agent === 'superego' && action === 'review') return true;
|
|
271
|
+
if (agent === 'user' && action === 'final_output') return false;
|
|
272
|
+
return false;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Check if an entry should be shown in messages-only mode.
|
|
277
|
+
*/
|
|
278
|
+
function isMessageVisible(entry) {
|
|
279
|
+
const { agent, action } = entry;
|
|
280
|
+
if (agent === 'user' && action === 'turn_action') return true;
|
|
281
|
+
if (action === 'revise') return true;
|
|
282
|
+
if (agent === 'ego' && action === 'generate' && !entry._hasRevision) return true;
|
|
283
|
+
if (agent === 'learner_synthesis' && action === 'response') return true;
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Format a single trace entry.
|
|
289
|
+
*/
|
|
290
|
+
export function formatEntry(entry, options = {}) {
|
|
291
|
+
const { detail = 'play' } = options;
|
|
292
|
+
const speaker = getSpeakerLabel(entry);
|
|
293
|
+
if (!speaker) return null;
|
|
294
|
+
|
|
295
|
+
const direction = getStageDirection(entry);
|
|
296
|
+
const content = getEntryContent(entry);
|
|
297
|
+
if (!content && !direction) return null;
|
|
298
|
+
|
|
299
|
+
const lines = [];
|
|
300
|
+
|
|
301
|
+
// Speaker name
|
|
302
|
+
lines.push(INDENT + speaker);
|
|
303
|
+
|
|
304
|
+
// Metadata subtitle (model, tokens, time, cost) — shown in play, compact, full modes
|
|
305
|
+
const metaLine = buildMetadataLine(entry, detail);
|
|
306
|
+
if (metaLine) {
|
|
307
|
+
lines.push(INDENT + ' ' + metaLine);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Stage direction
|
|
311
|
+
if (direction && detail !== 'messages-only') {
|
|
312
|
+
lines.push(ASIDE_INDENT + direction);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// Content
|
|
316
|
+
if (content) {
|
|
317
|
+
const indent = direction ? ASIDE_INDENT : INDENT;
|
|
318
|
+
lines.push(wrapText(content, indent, DEFAULT_WIDTH));
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Full mode: add raw timestamp and additional detail
|
|
322
|
+
if (detail === 'full') {
|
|
323
|
+
const extra = [];
|
|
324
|
+
if (entry.timestamp) extra.push(`time=${entry.timestamp}`);
|
|
325
|
+
if (entry.metrics?.generationId) extra.push(`gen=${entry.metrics.generationId}`);
|
|
326
|
+
if (entry.metrics?.finishReason) extra.push(`finish=${entry.metrics.finishReason}`);
|
|
327
|
+
if (extra.length > 0) {
|
|
328
|
+
lines.push(ASIDE_INDENT + `[${extra.join(', ')}]`);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return lines.join('\n');
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Format a full transcript from a consolidated trace array.
|
|
337
|
+
*
|
|
338
|
+
* @param {Array} trace - The consolidatedTrace array
|
|
339
|
+
* @param {Object} options
|
|
340
|
+
* @param {string} options.detail - 'play' | 'compact' | 'messages-only' | 'full' | 'bilateral'
|
|
341
|
+
* @param {string} options.scenarioName - Scenario title for the header
|
|
342
|
+
* @param {string} options.profileName - Cell/profile name
|
|
343
|
+
* @param {number} options.totalTurns - Total number of dialogue turns
|
|
344
|
+
* @returns {string} Formatted transcript text
|
|
345
|
+
*/
|
|
346
|
+
export function formatTranscript(trace, options = {}) {
|
|
347
|
+
const { detail = 'play', scenarioName = '', profileName = '', totalTurns = 0 } = options;
|
|
348
|
+
|
|
349
|
+
if (!trace || trace.length === 0) return '(empty trace)\n';
|
|
350
|
+
|
|
351
|
+
// Bilateral mode uses dialogue-turn-level grouping instead of turnIndex
|
|
352
|
+
if (detail === 'bilateral') {
|
|
353
|
+
return formatBilateralTranscript(trace, options);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Pre-process: mark entries that have a revision following them
|
|
357
|
+
const processed = trace.map((entry, i) => {
|
|
358
|
+
const copy = { ...entry };
|
|
359
|
+
if (entry.agent === 'ego' && entry.action === 'generate') {
|
|
360
|
+
// Check if a revision follows within the same turn
|
|
361
|
+
const hasRevision = trace.slice(i + 1).some(
|
|
362
|
+
e => e.turnIndex === entry.turnIndex && e.agent === 'ego' && (e.action === 'revise' || e.action === 'generate_final')
|
|
363
|
+
);
|
|
364
|
+
copy._hasRevision = hasRevision;
|
|
365
|
+
}
|
|
366
|
+
return copy;
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
const lines = [];
|
|
370
|
+
|
|
371
|
+
// Header
|
|
372
|
+
const center = (text) => {
|
|
373
|
+
const pad = Math.max(0, Math.floor((DEFAULT_WIDTH - text.length) / 2));
|
|
374
|
+
return ' '.repeat(pad) + text;
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
if (scenarioName || profileName) {
|
|
378
|
+
lines.push('');
|
|
379
|
+
if (scenarioName) {
|
|
380
|
+
const titleLine = totalTurns > 0 ? `${scenarioName.toUpperCase()} (${totalTurns}-turn)` : scenarioName.toUpperCase();
|
|
381
|
+
lines.push(center(titleLine));
|
|
382
|
+
}
|
|
383
|
+
if (profileName) lines.push(center(profileName));
|
|
384
|
+
lines.push(center('\u2500'.repeat(Math.min(DEFAULT_WIDTH - 10, 40))));
|
|
385
|
+
lines.push('');
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Group entries by turnIndex
|
|
389
|
+
const turnGroups = new Map();
|
|
390
|
+
for (const entry of processed) {
|
|
391
|
+
const ti = entry.turnIndex ?? 0;
|
|
392
|
+
if (!turnGroups.has(ti)) turnGroups.set(ti, []);
|
|
393
|
+
turnGroups.get(ti).push(entry);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
const sortedTurns = [...turnGroups.keys()].sort((a, b) => a - b);
|
|
397
|
+
|
|
398
|
+
for (const turnIdx of sortedTurns) {
|
|
399
|
+
const entries = turnGroups.get(turnIdx);
|
|
400
|
+
|
|
401
|
+
// ACT header
|
|
402
|
+
lines.push('');
|
|
403
|
+
lines.push(center(`ACT ${turnIdx + 1}`));
|
|
404
|
+
lines.push('');
|
|
405
|
+
|
|
406
|
+
// Separate main entries from reflections
|
|
407
|
+
const mainEntries = entries.filter(e => !isReflectionEntry(e));
|
|
408
|
+
const reflections = entries.filter(e => isReflectionEntry(e));
|
|
409
|
+
|
|
410
|
+
// Main entries
|
|
411
|
+
for (const entry of mainEntries) {
|
|
412
|
+
// Visibility filters
|
|
413
|
+
if (detail === 'compact' && !isCompactVisible(entry)) continue;
|
|
414
|
+
if (detail === 'messages-only' && !isMessageVisible(entry)) continue;
|
|
415
|
+
|
|
416
|
+
const formatted = formatEntry(entry, { detail });
|
|
417
|
+
if (formatted) {
|
|
418
|
+
lines.push(formatted);
|
|
419
|
+
lines.push('');
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Reflections (intermission)
|
|
424
|
+
if (reflections.length > 0 && detail !== 'messages-only') {
|
|
425
|
+
if (detail !== 'compact') {
|
|
426
|
+
lines.push(center('~~~ intermission ~~~'));
|
|
427
|
+
lines.push('');
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
for (const entry of reflections) {
|
|
431
|
+
if (detail === 'compact') {
|
|
432
|
+
// One-liner for compact mode
|
|
433
|
+
const speaker = getSpeakerLabel(entry);
|
|
434
|
+
const summary = (entry.contextSummary || entry.detail || '').substring(0, 80);
|
|
435
|
+
if (speaker && summary) {
|
|
436
|
+
lines.push(`${INDENT}[${speaker}] ${summary}`);
|
|
437
|
+
lines.push('');
|
|
438
|
+
}
|
|
439
|
+
} else {
|
|
440
|
+
const formatted = formatEntry(entry, { detail });
|
|
441
|
+
if (formatted) {
|
|
442
|
+
lines.push(formatted);
|
|
443
|
+
lines.push('');
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return lines.join('\n');
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Classify a trace entry as belonging to the tutor phase or learner phase.
|
|
455
|
+
*/
|
|
456
|
+
function isTutorEntry(entry) {
|
|
457
|
+
const tutorAgents = new Set([
|
|
458
|
+
'ego', 'superego', 'ego_self_reflection', 'superego_self_reflection',
|
|
459
|
+
'superego_disposition', 'ego_intersubjective', 'tutor_other_ego', 'ego_strategy',
|
|
460
|
+
'behavioral_overrides', 'rejection_budget',
|
|
461
|
+
]);
|
|
462
|
+
if (tutorAgents.has(entry.agent)) return true;
|
|
463
|
+
// context_input and final_output are tutor-phase bookends
|
|
464
|
+
if (entry.agent === 'user' && (entry.action === 'context_input' || entry.action === 'final_output')) return true;
|
|
465
|
+
// system entries (memory_cycle, etc.) belong to tutor phase
|
|
466
|
+
if (entry.agent === 'system') return true;
|
|
467
|
+
return false;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
* Format a bilateral transcript: splits trace into dialogue turns using
|
|
472
|
+
* final_output boundaries, then shows tutor and learner deliberation
|
|
473
|
+
* sequentially within each turn.
|
|
474
|
+
*
|
|
475
|
+
* This gives a per-dialogue-turn view:
|
|
476
|
+
* TURN 1
|
|
477
|
+
* ── TUTOR DELIBERATION ──
|
|
478
|
+
* context_input → ego generate → superego review → ...
|
|
479
|
+
* ── LEARNER DELIBERATION ──
|
|
480
|
+
* learner_ego → learner_superego → learner_ego_revision → learner_synthesis
|
|
481
|
+
* ── LEARNER MESSAGE ──
|
|
482
|
+
* (the external turn_action)
|
|
483
|
+
* TURN 2
|
|
484
|
+
* ...
|
|
485
|
+
*/
|
|
486
|
+
function formatBilateralTranscript(trace, options = {}) {
|
|
487
|
+
const { scenarioName = '', profileName = '', totalTurns = 0 } = options;
|
|
488
|
+
|
|
489
|
+
// Pre-process: mark ego generate entries that have revisions
|
|
490
|
+
const processed = trace.map((entry, i) => {
|
|
491
|
+
const copy = { ...entry };
|
|
492
|
+
if (entry.agent === 'ego' && entry.action === 'generate') {
|
|
493
|
+
const hasRevision = trace.slice(i + 1).some(
|
|
494
|
+
e => e.agent === 'ego' && (e.action === 'revise' || e.action === 'generate_final' || e.action === 'incorporate-feedback')
|
|
495
|
+
);
|
|
496
|
+
copy._hasRevision = hasRevision;
|
|
497
|
+
}
|
|
498
|
+
return copy;
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
// Split into dialogue turns.
|
|
502
|
+
// Each "dialogue turn" = tutor deliberation block + learner deliberation block.
|
|
503
|
+
// Tutor block ends at final_output; learner block ends at turn_action.
|
|
504
|
+
// For traces without final_output (unified single-turn), fall back to
|
|
505
|
+
// splitting on turn_action.
|
|
506
|
+
const dialogueTurns = [];
|
|
507
|
+
let currentEntries = [];
|
|
508
|
+
|
|
509
|
+
for (const entry of processed) {
|
|
510
|
+
currentEntries.push(entry);
|
|
511
|
+
|
|
512
|
+
// turn_action marks the end of a full dialogue turn (tutor + learner)
|
|
513
|
+
if (entry.agent === 'user' && entry.action === 'turn_action') {
|
|
514
|
+
dialogueTurns.push(currentEntries);
|
|
515
|
+
currentEntries = [];
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// Remaining entries after last turn_action (trailing tutor deliberation with no learner response)
|
|
520
|
+
if (currentEntries.length > 0) {
|
|
521
|
+
dialogueTurns.push(currentEntries);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
const lines = [];
|
|
525
|
+
|
|
526
|
+
const center = (text) => {
|
|
527
|
+
const pad = Math.max(0, Math.floor((DEFAULT_WIDTH - text.length) / 2));
|
|
528
|
+
return ' '.repeat(pad) + text;
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
// Header
|
|
532
|
+
if (scenarioName || profileName) {
|
|
533
|
+
lines.push('');
|
|
534
|
+
if (scenarioName) {
|
|
535
|
+
const titleLine = totalTurns > 0 ? `${scenarioName.toUpperCase()} (${totalTurns}-turn)` : scenarioName.toUpperCase();
|
|
536
|
+
lines.push(center(titleLine));
|
|
537
|
+
}
|
|
538
|
+
if (profileName) lines.push(center(profileName));
|
|
539
|
+
lines.push(center('\u2500'.repeat(Math.min(DEFAULT_WIDTH - 10, 40))));
|
|
540
|
+
lines.push('');
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
const PHASE_LINE = '\u2500'.repeat(30);
|
|
544
|
+
|
|
545
|
+
for (let turnNum = 0; turnNum < dialogueTurns.length; turnNum++) {
|
|
546
|
+
const entries = dialogueTurns[turnNum];
|
|
547
|
+
|
|
548
|
+
// Turn header
|
|
549
|
+
lines.push('');
|
|
550
|
+
lines.push(center(`TURN ${turnNum + 1}`));
|
|
551
|
+
lines.push('');
|
|
552
|
+
|
|
553
|
+
// Split entries into phases: tutor deliberation, learner deliberation, learner message
|
|
554
|
+
const tutorEntries = [];
|
|
555
|
+
const learnerDeliberation = [];
|
|
556
|
+
let learnerMessage = null;
|
|
557
|
+
const reflections = [];
|
|
558
|
+
|
|
559
|
+
for (const entry of entries) {
|
|
560
|
+
if (isReflectionEntry(entry)) {
|
|
561
|
+
reflections.push(entry);
|
|
562
|
+
} else if (entry.agent === 'user' && entry.action === 'turn_action') {
|
|
563
|
+
learnerMessage = entry;
|
|
564
|
+
} else if (isTutorEntry(entry)) {
|
|
565
|
+
tutorEntries.push(entry);
|
|
566
|
+
} else {
|
|
567
|
+
// Learner ego/superego/synthesis deliberation
|
|
568
|
+
learnerDeliberation.push(entry);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
// ── TUTOR DELIBERATION ──
|
|
573
|
+
if (tutorEntries.length > 0) {
|
|
574
|
+
lines.push(INDENT + `\u2500\u2500 TUTOR DELIBERATION ${PHASE_LINE}`);
|
|
575
|
+
lines.push('');
|
|
576
|
+
|
|
577
|
+
for (const entry of tutorEntries) {
|
|
578
|
+
// Skip final_output markers (they're structural, not content)
|
|
579
|
+
if (entry.agent === 'user' && entry.action === 'final_output') continue;
|
|
580
|
+
// Skip repeated context_input after the first turn — it's the same scenario data re-injected
|
|
581
|
+
if (entry.agent === 'user' && entry.action === 'context_input' && turnNum > 0) continue;
|
|
582
|
+
|
|
583
|
+
const formatted = formatEntry(entry, { detail: 'play' });
|
|
584
|
+
if (formatted) {
|
|
585
|
+
lines.push(formatted);
|
|
586
|
+
lines.push('');
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// ── LEARNER DELIBERATION ──
|
|
592
|
+
// Skip learner_synthesis — it duplicates the turn_action content shown in LEARNER MESSAGE
|
|
593
|
+
const deliberationOnly = learnerDeliberation.filter(
|
|
594
|
+
e => !(e.agent === 'learner_synthesis' && e.action === 'response')
|
|
595
|
+
);
|
|
596
|
+
if (deliberationOnly.length > 0) {
|
|
597
|
+
lines.push(INDENT + `\u2500\u2500 LEARNER DELIBERATION ${PHASE_LINE}`);
|
|
598
|
+
lines.push('');
|
|
599
|
+
|
|
600
|
+
for (const entry of deliberationOnly) {
|
|
601
|
+
const formatted = formatEntry(entry, { detail: 'play' });
|
|
602
|
+
if (formatted) {
|
|
603
|
+
lines.push(formatted);
|
|
604
|
+
lines.push('');
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// ── LEARNER MESSAGE ── (the external turn_action)
|
|
610
|
+
if (learnerMessage) {
|
|
611
|
+
lines.push(INDENT + `\u2500\u2500 LEARNER MESSAGE ${PHASE_LINE}`);
|
|
612
|
+
lines.push('');
|
|
613
|
+
const formatted = formatEntry(learnerMessage, { detail: 'play' });
|
|
614
|
+
if (formatted) {
|
|
615
|
+
lines.push(formatted);
|
|
616
|
+
lines.push('');
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
// Between-turn reflections (intermission)
|
|
621
|
+
if (reflections.length > 0) {
|
|
622
|
+
lines.push(center('~~~ intermission ~~~'));
|
|
623
|
+
lines.push('');
|
|
624
|
+
for (const entry of reflections) {
|
|
625
|
+
const formatted = formatEntry(entry, { detail: 'play' });
|
|
626
|
+
if (formatted) {
|
|
627
|
+
lines.push(formatted);
|
|
628
|
+
lines.push('');
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
return lines.join('\n');
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
/**
|
|
638
|
+
* Format a single entry for incremental/streaming output (one line per event).
|
|
639
|
+
* Used for live console output during runs.
|
|
640
|
+
*/
|
|
641
|
+
export function formatCompactLine(entry) {
|
|
642
|
+
const speaker = getSpeakerLabel(entry);
|
|
643
|
+
if (!speaker) return null;
|
|
644
|
+
|
|
645
|
+
const { agent, action } = entry;
|
|
646
|
+
const meta = buildMetadataLine(entry, 'compact');
|
|
647
|
+
const metaSuffix = meta ? ` (${meta})` : '';
|
|
648
|
+
|
|
649
|
+
// Learner message
|
|
650
|
+
if (agent === 'user' && action === 'turn_action') {
|
|
651
|
+
const msg = (entry.contextSummary || entry.detail || '').substring(0, 120);
|
|
652
|
+
return ` [LEARNER] ${msg}`;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
// Superego review
|
|
656
|
+
if (agent === 'superego' && action === 'review') {
|
|
657
|
+
const verdict = entry.approved ? 'APPROVED' : 'REVISE';
|
|
658
|
+
const feedback = (entry.feedback || entry.verdict?.feedback || '').substring(0, 80);
|
|
659
|
+
return ` [SUPEREGO ${verdict}]${metaSuffix} ${feedback}`;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Final tutor output (revised or initial)
|
|
663
|
+
if (action === 'revise' || (agent === 'ego' && action === 'generate' && !entry._hasRevision)) {
|
|
664
|
+
const msg = (entry.suggestions || []).map(s => (s.message || s.title || '').substring(0, 80)).join('; ');
|
|
665
|
+
return ` [TUTOR]${metaSuffix} ${msg}`;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
// Reflections (compact one-liner)
|
|
669
|
+
if (isReflectionEntry(entry)) {
|
|
670
|
+
const summary = (entry.contextSummary || '').substring(0, 80);
|
|
671
|
+
return ` [${speaker}] ${summary}`;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
return null;
|
|
675
|
+
}
|