@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
@@ -0,0 +1,857 @@
1
+ /**
2
+ * Learner-Tutor Interaction Engine
3
+ *
4
+ * Orchestrates multi-turn interactions between synthetic learner agents
5
+ * and tutor agents for evaluation purposes. Tracks both internal deliberation
6
+ * and external dialogue, with hooks for judge evaluation.
7
+ */
8
+
9
+ import * as learnerConfig from './learnerConfigLoader.js';
10
+ import { tutorConfigLoader as tutorConfig } from '@machinespirits/tutor-core';
11
+
12
+ import * as learnerWritingPad from './memory/learnerWritingPad.js';
13
+ import * as tutorWritingPad from './memory/tutorWritingPad.js';
14
+
15
+ // ============================================================================
16
+ // Interaction Engine Configuration
17
+ // ============================================================================
18
+
19
+ const DEFAULT_MAX_TURNS = 10;
20
+
21
+ // Interaction outcomes for tracking
22
+ const INTERACTION_OUTCOMES = {
23
+ BREAKTHROUGH: 'breakthrough', // Learner shows genuine understanding
24
+ PRODUCTIVE_STRUGGLE: 'productive_struggle', // Healthy confusion/effort
25
+ MUTUAL_RECOGNITION: 'mutual_recognition', // Both parties recognize each other
26
+ FRUSTRATION: 'frustration', // Learner becomes frustrated
27
+ DISENGAGEMENT: 'disengagement', // Learner disengages
28
+ SCAFFOLDING_NEEDED: 'scaffolding_needed', // Learner needs more support
29
+ FADING_APPROPRIATE: 'fading_appropriate', // Ready for less support
30
+ TRANSFORMATION: 'transformation', // Conceptual restructuring occurred
31
+ };
32
+
33
+ // ============================================================================
34
+ // Main Interaction Function
35
+ // ============================================================================
36
+
37
+ /**
38
+ * Run a multi-turn interaction between learner and tutor agents
39
+ *
40
+ * @param {Object} config - Interaction configuration
41
+ * @param {string} config.learnerId - Unique learner identifier
42
+ * @param {string} config.personaId - Learner persona (from LEARNER_PERSONAS)
43
+ * @param {string} config.tutorProfile - Tutor profile name
44
+ * @param {string} config.topic - Topic to discuss
45
+ * @param {Object} config.scenario - Scenario configuration
46
+ * @param {Function} llmCall - Async function to call LLM
47
+ * @param {Object} options - Additional options
48
+ */
49
+ export async function runInteraction(config, llmCall, options = {}) {
50
+ const {
51
+ learnerId,
52
+ personaId = 'productive_struggler',
53
+ tutorProfile = 'default',
54
+ topic,
55
+ scenario,
56
+ sessionId = `session-${Date.now()}`,
57
+ } = config;
58
+
59
+ const {
60
+ maxTurns = DEFAULT_MAX_TURNS,
61
+ trace = true,
62
+ observeInternals = true,
63
+ } = options;
64
+
65
+ const startTime = Date.now();
66
+
67
+ // Initialize interaction state
68
+ const interactionTrace = {
69
+ id: `interaction-${Date.now()}`,
70
+ learnerId,
71
+ personaId,
72
+ tutorProfile,
73
+ topic,
74
+ sessionId,
75
+ turns: [],
76
+ outcomes: [],
77
+ metrics: {
78
+ totalInputTokens: 0,
79
+ totalOutputTokens: 0,
80
+ learnerInputTokens: 0,
81
+ learnerOutputTokens: 0,
82
+ tutorInputTokens: 0,
83
+ tutorOutputTokens: 0,
84
+ },
85
+ writingPadSnapshots: {
86
+ learner: { before: null, after: null },
87
+ tutor: { before: null, after: null },
88
+ },
89
+ };
90
+
91
+ // Get persona and profile configuration
92
+ const learnerPersona = learnerConfig.getPersona(personaId);
93
+ const learnerProfile = learnerConfig.getActiveProfile(options.learnerProfile);
94
+ const learnerArchitecture = learnerProfile.architecture || learnerPersona.default_architecture || 'unified';
95
+
96
+ // Take "before" snapshots
97
+ interactionTrace.writingPadSnapshots.learner.before = learnerWritingPad.createSnapshot(learnerId);
98
+ interactionTrace.writingPadSnapshots.tutor.before = tutorWritingPad.createSnapshot(learnerId);
99
+
100
+ // Initialize conversation history
101
+ const conversationHistory = [];
102
+
103
+ // Generate initial learner message based on scenario
104
+ let currentLearnerMessage = await generateInitialLearnerMessage(
105
+ learnerPersona,
106
+ learnerArchitecture,
107
+ learnerProfile,
108
+ scenario,
109
+ topic,
110
+ llmCall,
111
+ interactionTrace
112
+ );
113
+
114
+ conversationHistory.push({
115
+ role: 'learner',
116
+ content: currentLearnerMessage.externalMessage,
117
+ internalDeliberation: observeInternals ? currentLearnerMessage.internalDeliberation : null,
118
+ });
119
+
120
+ // Record the INITIAL learner message in the trace (Turn 0)
121
+ // This ensures the learner is shown as initiating the conversation
122
+ interactionTrace.turns.push({
123
+ turnNumber: 0,
124
+ phase: 'learner',
125
+ externalMessage: currentLearnerMessage.externalMessage,
126
+ internalDeliberation: currentLearnerMessage.internalDeliberation,
127
+ emotionalState: currentLearnerMessage.emotionalState,
128
+ understandingLevel: currentLearnerMessage.understandingLevel,
129
+ timestamp: new Date().toISOString(),
130
+ });
131
+
132
+ // Main interaction loop
133
+ let turnCount = 0;
134
+ let interactionContinues = true;
135
+
136
+ while (turnCount < maxTurns && interactionContinues) {
137
+ turnCount++;
138
+
139
+ // ================ TUTOR TURN ================
140
+ const tutorResponse = await runTutorTurn(
141
+ learnerId,
142
+ sessionId,
143
+ currentLearnerMessage.externalMessage,
144
+ conversationHistory,
145
+ tutorProfile,
146
+ topic,
147
+ llmCall,
148
+ interactionTrace
149
+ );
150
+
151
+ conversationHistory.push({
152
+ role: 'tutor',
153
+ content: tutorResponse.externalMessage,
154
+ internalDeliberation: observeInternals ? tutorResponse.internalDeliberation : null,
155
+ });
156
+
157
+ interactionTrace.turns.push({
158
+ turnNumber: turnCount,
159
+ phase: 'tutor',
160
+ externalMessage: tutorResponse.externalMessage,
161
+ internalDeliberation: tutorResponse.internalDeliberation,
162
+ strategy: tutorResponse.strategy,
163
+ timestamp: new Date().toISOString(),
164
+ });
165
+
166
+ // Update tutor writing pad
167
+ await updateTutorWritingPad(learnerId, sessionId, tutorResponse, currentLearnerMessage);
168
+
169
+ // Check for natural ending
170
+ if (tutorResponse.suggestsEnding) {
171
+ interactionContinues = false;
172
+ break;
173
+ }
174
+
175
+ // ================ LEARNER TURN ================
176
+ const learnerResponse = await runLearnerTurn(
177
+ learnerId,
178
+ sessionId,
179
+ learnerPersona,
180
+ learnerArchitecture,
181
+ learnerProfile,
182
+ tutorResponse.externalMessage,
183
+ conversationHistory,
184
+ topic,
185
+ llmCall,
186
+ interactionTrace
187
+ );
188
+
189
+ conversationHistory.push({
190
+ role: 'learner',
191
+ content: learnerResponse.externalMessage,
192
+ internalDeliberation: observeInternals ? learnerResponse.internalDeliberation : null,
193
+ });
194
+
195
+ interactionTrace.turns.push({
196
+ turnNumber: turnCount,
197
+ phase: 'learner',
198
+ externalMessage: learnerResponse.externalMessage,
199
+ internalDeliberation: learnerResponse.internalDeliberation,
200
+ emotionalState: learnerResponse.emotionalState,
201
+ understandingLevel: learnerResponse.understandingLevel,
202
+ timestamp: new Date().toISOString(),
203
+ });
204
+
205
+ // Update learner writing pad
206
+ await updateLearnerWritingPad(learnerId, sessionId, learnerResponse, tutorResponse, topic);
207
+
208
+ // Detect outcomes
209
+ const turnOutcomes = detectTurnOutcomes(learnerResponse, tutorResponse);
210
+ interactionTrace.outcomes.push(...turnOutcomes);
211
+
212
+ // Check for natural ending
213
+ if (learnerResponse.suggestsEnding || learnerResponse.emotionalState === 'disengaged') {
214
+ interactionContinues = false;
215
+ break;
216
+ }
217
+
218
+ currentLearnerMessage = learnerResponse;
219
+ }
220
+
221
+ // Take "after" snapshots
222
+ interactionTrace.writingPadSnapshots.learner.after = learnerWritingPad.createSnapshot(learnerId);
223
+ interactionTrace.writingPadSnapshots.tutor.after = tutorWritingPad.createSnapshot(learnerId);
224
+
225
+ // Compute summary metrics
226
+ interactionTrace.metrics.totalLatencyMs = Date.now() - startTime;
227
+ interactionTrace.metrics.turnCount = turnCount;
228
+ interactionTrace.summary = generateInteractionSummary(interactionTrace);
229
+
230
+ return interactionTrace;
231
+ }
232
+
233
+ // ============================================================================
234
+ // Learner Turn Implementation
235
+ // ============================================================================
236
+
237
+ /**
238
+ * Generate initial learner message based on scenario
239
+ */
240
+ async function generateInitialLearnerMessage(persona, architecture, profile, scenario, topic, llmCall, trace) {
241
+ // Get agent roles from profile (not architecture)
242
+ const agentRoles = learnerConfig.getProfileAgentRoles(profile.name);
243
+ const internalDeliberation = [];
244
+
245
+ // Run internal deliberation for each agent in the profile
246
+ // For ego/superego pattern: superego sees and critiques ego's initial response
247
+ for (const role of agentRoles) {
248
+ const agentConfig = learnerConfig.getAgentConfig(role, profile.name);
249
+ if (!agentConfig) continue;
250
+
251
+ // Build context based on role
252
+ let roleContext = `
253
+ Topic: ${topic}
254
+ Scenario: ${scenario?.name || 'General learning'}
255
+ Initial state: ${scenario?.learnerStartState || 'Beginning new topic'}`;
256
+
257
+ // If this is superego and we have prior deliberation (ego), include it for critique
258
+ if (role === 'superego' && internalDeliberation.length > 0) {
259
+ const priorDeliberation = internalDeliberation
260
+ .map(d => `${d.role.toUpperCase()}: ${d.content}`)
261
+ .join('\n\n');
262
+ roleContext += `
263
+
264
+ The EGO's initial reaction was:
265
+ ${priorDeliberation}
266
+
267
+ Review the EGO's first impression. Is it too superficial? What's being avoided? What would lead to genuine learning?`;
268
+ } else {
269
+ roleContext += `
270
+
271
+ Generate this agent's internal voice as the learner approaches this topic for the first time.`;
272
+ }
273
+
274
+ const prompt = buildLearnerPrompt(agentConfig, persona, roleContext);
275
+
276
+ const response = await llmCall(agentConfig.model, prompt, [{ role: 'user', content: role === 'superego' ? 'Critique the EGO\'s initial reaction.' : 'Generate your internal voice.' }], {
277
+ temperature: agentConfig.hyperparameters?.temperature || 0.7,
278
+ maxTokens: agentConfig.hyperparameters?.max_tokens || 200,
279
+ });
280
+
281
+ internalDeliberation.push({
282
+ role,
283
+ content: response.content,
284
+ });
285
+
286
+ trace.metrics.learnerInputTokens += response.usage?.inputTokens || 0;
287
+ trace.metrics.learnerOutputTokens += response.usage?.outputTokens || 0;
288
+ }
289
+
290
+ // Synthesize external message
291
+ const synthesisConfig = learnerConfig.getSynthesisConfig(profile.name);
292
+
293
+ // If scenario provides an opening message, use it directly to ensure context
294
+ // Otherwise, synthesize from internal deliberation
295
+ const hasOpeningMessage = scenario?.learnerOpening && scenario.learnerOpening.trim().length > 0;
296
+
297
+ const synthesisPrompt = hasOpeningMessage
298
+ ? `You are simulating a learner with these internal voices:
299
+
300
+ ${internalDeliberation.map(d => `${d.role.toUpperCase()}: ${d.content}`).join('\n\n')}
301
+
302
+ The learner wants to open with this message: "${scenario.learnerOpening}"
303
+
304
+ Lightly adapt this opening to feel natural given the internal deliberation, but keep the core content and question intact.
305
+ The adapted message should be 1-3 sentences and maintain the original meaning.`
306
+ : `You are simulating a learner with these internal voices:
307
+
308
+ ${internalDeliberation.map(d => `${d.role.toUpperCase()}: ${d.content}`).join('\n\n')}
309
+
310
+ Synthesize these into a realistic first message to a tutor about: ${topic}
311
+
312
+ The message should feel authentic - not too polished, showing real confusion or interest.
313
+ Keep it 1-3 sentences.`;
314
+
315
+ const synthModel = synthesisConfig?.model || resolveProfileModel(profile);
316
+ const externalResponse = await llmCall(synthModel, synthesisPrompt, [{ role: 'user', content: 'Generate the learner\'s opening message.' }], {
317
+ temperature: synthesisConfig?.hyperparameters?.temperature || 0.7,
318
+ maxTokens: synthesisConfig?.hyperparameters?.max_tokens || 200,
319
+ });
320
+
321
+ trace.metrics.learnerInputTokens += externalResponse.usage?.inputTokens || 0;
322
+ trace.metrics.learnerOutputTokens += externalResponse.usage?.outputTokens || 0;
323
+
324
+ return {
325
+ externalMessage: externalResponse.content,
326
+ internalDeliberation,
327
+ emotionalState: detectEmotionalState(internalDeliberation),
328
+ understandingLevel: 'initial',
329
+ };
330
+ }
331
+
332
+ /**
333
+ * Build learner prompt with agent config and persona
334
+ */
335
+ function buildLearnerPrompt(agentConfig, persona, additionalContext) {
336
+ let prompt = agentConfig.prompt || '';
337
+
338
+ // Add persona context
339
+ if (persona.prompt_modifier) {
340
+ prompt += `\n\n${persona.prompt_modifier}`;
341
+ }
342
+
343
+ // Add additional context
344
+ if (additionalContext) {
345
+ prompt += `\n\n${additionalContext}`;
346
+ }
347
+
348
+ return prompt;
349
+ }
350
+
351
+ /**
352
+ * Resolve model from profile configuration
353
+ */
354
+ function resolveProfileModel(profile) {
355
+ const providerConfig = learnerConfig.getProviderConfig(profile.provider || 'openrouter');
356
+ const modelAlias = profile.model || 'nemotron';
357
+ return providerConfig.models?.[modelAlias] || modelAlias;
358
+ }
359
+
360
+ /**
361
+ * Run a learner turn in response to tutor
362
+ */
363
+ async function runLearnerTurn(learnerId, sessionId, persona, architecture, profile, tutorMessage, history, topic, llmCall, trace) {
364
+ // Get agent roles from profile (not architecture)
365
+ const agentRoles = learnerConfig.getProfileAgentRoles(profile.name);
366
+ const internalDeliberation = [];
367
+
368
+ // Get current learner memory state
369
+ const learnerMemory = learnerWritingPad.buildNarrativeSummary(learnerId, sessionId);
370
+
371
+ // Build conversation context
372
+ const conversationContext = history
373
+ .slice(-6)
374
+ .map(m => `${m.role.toUpperCase()}: ${m.content}`)
375
+ .join('\n\n');
376
+
377
+ // Run internal deliberation for each agent
378
+ // For ego/superego pattern: superego sees and critiques ego's response
379
+ for (const role of agentRoles) {
380
+ const agentConfig = learnerConfig.getAgentConfig(role, profile.name);
381
+ if (!agentConfig) continue;
382
+
383
+ // Build context based on role
384
+ let roleContext = `
385
+ Topic: ${topic}
386
+
387
+ Your memory and state:
388
+ ${learnerMemory}
389
+
390
+ Recent conversation:
391
+ ${conversationContext}
392
+
393
+ The tutor just said:
394
+ "${tutorMessage}"`;
395
+
396
+ // If this is superego and we have prior deliberation (ego), include it for critique
397
+ if (role === 'superego' && internalDeliberation.length > 0) {
398
+ const priorDeliberation = internalDeliberation
399
+ .map(d => `${d.role.toUpperCase()}: ${d.content}`)
400
+ .join('\n\n');
401
+ roleContext += `
402
+
403
+ The EGO's initial reaction was:
404
+ ${priorDeliberation}
405
+
406
+ Review the EGO's response. Is it accurate? What's being missed or glossed over? What should we really ask or admit?`;
407
+ } else {
408
+ roleContext += `
409
+
410
+ Generate your internal reaction as this dimension of the learner's experience.`;
411
+ }
412
+
413
+ const prompt = buildLearnerPrompt(agentConfig, persona, roleContext);
414
+
415
+ const response = await llmCall(agentConfig.model, prompt, [{ role: 'user', content: role === 'superego' ? 'Critique the EGO\'s reaction.' : 'React to the tutor\'s message.' }], {
416
+ temperature: agentConfig.hyperparameters?.temperature || 0.7,
417
+ maxTokens: agentConfig.hyperparameters?.max_tokens || 200,
418
+ });
419
+
420
+ internalDeliberation.push({
421
+ role,
422
+ content: response.content,
423
+ });
424
+
425
+ trace.metrics.learnerInputTokens += response.usage?.inputTokens || 0;
426
+ trace.metrics.learnerOutputTokens += response.usage?.outputTokens || 0;
427
+ }
428
+
429
+ // Synthesize external response
430
+ const emotionalState = detectEmotionalState(internalDeliberation);
431
+ const understandingLevel = detectUnderstandingLevel(internalDeliberation);
432
+
433
+ const synthesisConfig = learnerConfig.getSynthesisConfig(profile.name);
434
+ const synthesisPrompt = `You are simulating a ${persona.name} learner with these internal reactions:
435
+
436
+ ${internalDeliberation.map(d => `${d.role.toUpperCase()}: ${d.content}`).join('\n\n')}
437
+
438
+ Current emotional state: ${emotionalState}
439
+ Current understanding: ${understandingLevel}
440
+
441
+ The tutor just said: "${tutorMessage}"
442
+
443
+ Synthesize these internal reactions into a realistic response. The learner should:
444
+ - Show their genuine reaction (confusion, interest, frustration, insight)
445
+ - Ask follow-up questions if naturally arising
446
+ - Not be too polished or articulate if they're genuinely confused
447
+ - Match the persona: ${persona.description}
448
+
449
+ Keep response to 1-4 sentences. Be authentic.`;
450
+
451
+ const synthModel = synthesisConfig?.model || resolveProfileModel(profile);
452
+ const externalResponse = await llmCall(synthModel, synthesisPrompt, [{ role: 'user', content: 'Generate the learner\'s response.' }], {
453
+ temperature: synthesisConfig?.hyperparameters?.temperature || 0.7,
454
+ maxTokens: synthesisConfig?.hyperparameters?.max_tokens || 250,
455
+ });
456
+
457
+ trace.metrics.learnerInputTokens += externalResponse.usage?.inputTokens || 0;
458
+ trace.metrics.learnerOutputTokens += externalResponse.usage?.outputTokens || 0;
459
+
460
+ return {
461
+ externalMessage: externalResponse.content,
462
+ internalDeliberation,
463
+ emotionalState,
464
+ understandingLevel,
465
+ suggestsEnding: emotionalState === 'satisfied' || emotionalState === 'disengaged',
466
+ };
467
+ }
468
+
469
+ // ============================================================================
470
+ // Tutor Turn Implementation
471
+ // ============================================================================
472
+
473
+ /**
474
+ * Run a tutor turn in response to learner
475
+ */
476
+ async function runTutorTurn(learnerId, sessionId, learnerMessage, history, tutorProfileName, topic, llmCall, trace) {
477
+ // Get tutor memory for this learner
478
+ const tutorMemory = tutorWritingPad.buildNarrativeSummary(learnerId, sessionId);
479
+
480
+ // Build conversation context
481
+ const conversationContext = history
482
+ .slice(-6)
483
+ .map(m => `${m.role.toUpperCase()}: ${m.content}`)
484
+ .join('\n\n');
485
+
486
+ // Get tutor configuration from profile
487
+ const profile = tutorConfig.getActiveProfile(tutorProfileName);
488
+ const egoConfig = tutorConfig.getAgentConfig('ego', tutorProfileName);
489
+ const superegoConfig = tutorConfig.getAgentConfig('superego', tutorProfileName);
490
+
491
+ // Tutor internal deliberation
492
+ const internalDeliberation = [];
493
+
494
+ // ===== T.EGO: Draft initial response =====
495
+ const egoPrompt = `${egoConfig?.prompt || 'You are a thoughtful AI tutor.'}
496
+
497
+ Your accumulated knowledge about this learner:
498
+ ${tutorMemory || 'This is a new learner - no prior history.'}
499
+
500
+ Topic: ${topic}
501
+
502
+ Recent conversation:
503
+ ${conversationContext}
504
+
505
+ The learner just said:
506
+ "${learnerMessage}"
507
+
508
+ Draft your INITIAL response as a tutor. Consider:
509
+ 1. What is this learner's current state? (confused, engaged, frustrated, etc.)
510
+ 2. What strategy would work best? (scaffolding, questioning, direct explanation, validation)
511
+ 3. How can you advance their understanding while respecting their current position?
512
+
513
+ Be warm but intellectually challenging. Don't be condescending. Build on their words.
514
+
515
+ Provide ONLY your draft response text (it will be reviewed by your pedagogical critic).`;
516
+
517
+ const tutorModel = egoConfig?.model || tutorConfig.getProviderConfig('openrouter')?.default_model;
518
+
519
+ const egoResponse = await llmCall(tutorModel, egoPrompt, [{ role: 'user', content: learnerMessage }], {
520
+ temperature: egoConfig?.hyperparameters?.temperature || 0.6,
521
+ maxTokens: egoConfig?.hyperparameters?.max_tokens || 800,
522
+ });
523
+
524
+ trace.metrics.tutorInputTokens += egoResponse.usage?.inputTokens || 0;
525
+ trace.metrics.tutorOutputTokens += egoResponse.usage?.outputTokens || 0;
526
+
527
+ const egoDraft = egoResponse.content || '';
528
+ internalDeliberation.push({
529
+ role: 'ego',
530
+ content: egoDraft,
531
+ });
532
+
533
+ // ===== T.SUPEREGO: Critique and refine =====
534
+ const superegoPrompt = `${superegoConfig?.prompt || 'You are a pedagogical critic reviewing tutor responses.'}
535
+
536
+ Context about the learner:
537
+ ${tutorMemory || 'New learner - no prior history.'}
538
+
539
+ Topic: ${topic}
540
+
541
+ The learner said:
542
+ "${learnerMessage}"
543
+
544
+ The tutor's DRAFT response:
545
+ "${egoDraft}"
546
+
547
+ CRITIQUE this draft. Consider:
548
+ 1. Pedagogical soundness: Does it advance learning or just provide answers?
549
+ 2. Emotional attunement: Does it respect the learner's current state?
550
+ 3. Socratic method: Does it ask generative questions or just lecture?
551
+ 4. ZPD awareness: Is the scaffolding appropriate for their level?
552
+
553
+ After your critique, provide an IMPROVED version if needed. Format:
554
+
555
+ CRITIQUE: [your analysis]
556
+ IMPROVED: [refined response, or "APPROVED" if draft is good]`;
557
+
558
+ const superegoModel = superegoConfig?.model || tutorModel;
559
+
560
+ const superegoResponse = await llmCall(superegoModel, superegoPrompt, [{ role: 'user', content: egoDraft }], {
561
+ temperature: superegoConfig?.hyperparameters?.temperature || 0.4,
562
+ maxTokens: superegoConfig?.hyperparameters?.max_tokens || 1000,
563
+ });
564
+
565
+ trace.metrics.tutorInputTokens += superegoResponse.usage?.inputTokens || 0;
566
+ trace.metrics.tutorOutputTokens += superegoResponse.usage?.outputTokens || 0;
567
+
568
+ const superegoContent = superegoResponse.content || '';
569
+ internalDeliberation.push({
570
+ role: 'superego',
571
+ content: superegoContent,
572
+ });
573
+
574
+ // Parse superego response for improved version
575
+ let externalMessage = egoDraft;
576
+ const improvedMatch = superegoContent.match(/IMPROVED:\s*([\s\S]*?)(?:$)/i);
577
+ if (improvedMatch && improvedMatch[1]) {
578
+ const improved = improvedMatch[1].trim();
579
+ if (improved.toUpperCase() !== 'APPROVED' && improved.length > 20) {
580
+ externalMessage = improved;
581
+ }
582
+ }
583
+
584
+ // Log if response is empty (helps debug API issues)
585
+ if (!externalMessage || externalMessage.trim() === '') {
586
+ console.warn(`[TutorTurn] Empty response from model ${tutorModel}. Raw ego draft:`, egoDraft);
587
+ }
588
+
589
+ // Detect tutor's implicit strategy
590
+ const strategy = detectTutorStrategy(externalMessage || '');
591
+
592
+ // Extract message from JSON if tutor returned structured response
593
+ externalMessage = extractTutorMessage(externalMessage);
594
+
595
+ // Fallback for empty responses - generate a brief acknowledgment
596
+ if (!externalMessage || externalMessage.trim() === '') {
597
+ console.warn('[TutorTurn] Empty message after extraction, using fallback');
598
+ externalMessage = "I see what you're saying. Let me think about that for a moment. Could you tell me more about what's confusing you?";
599
+ }
600
+
601
+ return {
602
+ externalMessage,
603
+ rawResponse: egoResponse.content, // Keep raw for debugging
604
+ internalDeliberation,
605
+ strategy,
606
+ suggestsEnding: externalMessage.toLowerCase().includes('good place to pause') ||
607
+ externalMessage.toLowerCase().includes('think about this'),
608
+ };
609
+ }
610
+
611
+ /**
612
+ * Extract the message from tutor's response (handles JSON or plain text)
613
+ */
614
+ function extractTutorMessage(content) {
615
+ if (!content) return '';
616
+
617
+ // Try to parse as JSON array (tutor suggestion format)
618
+ try {
619
+ const trimmed = content.trim();
620
+ if (trimmed.startsWith('[')) {
621
+ const parsed = JSON.parse(trimmed);
622
+ if (Array.isArray(parsed) && parsed.length > 0) {
623
+ // Get the message from the first suggestion
624
+ const firstSuggestion = parsed[0];
625
+ if (firstSuggestion.message) {
626
+ return firstSuggestion.message;
627
+ }
628
+ }
629
+ }
630
+ // Try as single JSON object
631
+ if (trimmed.startsWith('{')) {
632
+ const parsed = JSON.parse(trimmed);
633
+ if (parsed.message) {
634
+ return parsed.message;
635
+ }
636
+ }
637
+ } catch (e) {
638
+ // Not valid JSON, return as-is
639
+ }
640
+
641
+ // Return content as-is if not JSON
642
+ return content;
643
+ }
644
+
645
+ // ============================================================================
646
+ // Writing Pad Updates
647
+ // ============================================================================
648
+
649
+ /**
650
+ * Update learner writing pad based on turn
651
+ */
652
+ async function updateLearnerWritingPad(learnerId, sessionId, learnerResponse, tutorResponse, topic) {
653
+ // Update conscious layer
654
+ learnerWritingPad.updateConsciousLayer(learnerId, sessionId, {
655
+ currentTopic: topic,
656
+ currentUnderstanding: learnerResponse.understandingLevel,
657
+ emotionalState: learnerResponse.emotionalState,
658
+ });
659
+
660
+ // Check for breakthrough/trauma signals
661
+ if (learnerResponse.understandingLevel === 'transforming' ||
662
+ learnerResponse.externalMessage.toLowerCase().includes('oh, i see') ||
663
+ learnerResponse.externalMessage.toLowerCase().includes('wait, so')) {
664
+ learnerWritingPad.recordBreakthrough(learnerId, {
665
+ momentDescription: 'Understanding shift detected',
666
+ concept: topic,
667
+ impactScore: 0.6,
668
+ context: tutorResponse.externalMessage.slice(0, 100),
669
+ });
670
+ }
671
+
672
+ if (learnerResponse.emotionalState === 'frustrated' ||
673
+ learnerResponse.externalMessage.toLowerCase().includes("don't understand")) {
674
+ learnerWritingPad.recordTrauma(learnerId, {
675
+ momentDescription: 'Frustration with comprehension',
676
+ concept: topic,
677
+ impactScore: 0.4,
678
+ trigger: tutorResponse.strategy || 'unknown',
679
+ });
680
+ }
681
+
682
+ // Record lesson access
683
+ learnerWritingPad.recordLesson(learnerId, topic, {
684
+ currentUnderstanding: learnerResponse.understandingLevel,
685
+ });
686
+ }
687
+
688
+ /**
689
+ * Update tutor writing pad based on turn
690
+ */
691
+ async function updateTutorWritingPad(learnerId, sessionId, tutorResponse, learnerMessage) {
692
+ // Update conscious state
693
+ tutorWritingPad.updateConsciousState(learnerId, sessionId, {
694
+ currentStrategy: tutorResponse.strategy,
695
+ learnerPerceivedState: learnerMessage.emotionalState || 'unknown',
696
+ immediateGoal: 'advance understanding',
697
+ });
698
+
699
+ // Record strategy effectiveness (will be updated based on learner response)
700
+ if (tutorResponse.strategy) {
701
+ // We'll mark success/failure on the next turn based on learner response
702
+ // For now, just record use
703
+ tutorWritingPad.recordIntervention(learnerId, sessionId, {
704
+ interventionType: tutorResponse.strategy,
705
+ interventionDescription: tutorResponse.externalMessage.slice(0, 200),
706
+ context: learnerMessage.externalMessage?.slice(0, 100),
707
+ });
708
+ }
709
+ }
710
+
711
+ // ============================================================================
712
+ // Detection Helpers
713
+ // ============================================================================
714
+
715
+ /**
716
+ * Detect emotional state from internal deliberation
717
+ */
718
+ function detectEmotionalState(deliberation) {
719
+ const combinedText = deliberation.map(d => d.content.toLowerCase()).join(' ');
720
+
721
+ if (combinedText.includes('frustrat') || combinedText.includes('confus') && combinedText.includes('give up')) {
722
+ return 'frustrated';
723
+ }
724
+ if (combinedText.includes('excit') || combinedText.includes('interest') || combinedText.includes('curious')) {
725
+ return 'engaged';
726
+ }
727
+ if (combinedText.includes('bored') || combinedText.includes("don't care") || combinedText.includes('whatever')) {
728
+ return 'disengaged';
729
+ }
730
+ if (combinedText.includes('understand') && combinedText.includes('now')) {
731
+ return 'satisfied';
732
+ }
733
+ if (combinedText.includes('confus') || combinedText.includes("don't get")) {
734
+ return 'confused';
735
+ }
736
+ return 'neutral';
737
+ }
738
+
739
+ /**
740
+ * Detect understanding level from internal deliberation
741
+ */
742
+ function detectUnderstandingLevel(deliberation) {
743
+ const combinedText = deliberation.map(d => d.content.toLowerCase()).join(' ');
744
+
745
+ if (combinedText.includes('completely lost') || combinedText.includes('no idea')) {
746
+ return 'none';
747
+ }
748
+ if (combinedText.includes('starting to') || combinedText.includes('maybe') || combinedText.includes('partially')) {
749
+ return 'partial';
750
+ }
751
+ if (combinedText.includes('i get it') || combinedText.includes('makes sense') || combinedText.includes('i see')) {
752
+ return 'solid';
753
+ }
754
+ if (combinedText.includes('wait, so') || combinedText.includes('that means') || combinedText.includes('restructur')) {
755
+ return 'transforming';
756
+ }
757
+ return 'developing';
758
+ }
759
+
760
+ /**
761
+ * Detect tutor's strategy from response
762
+ */
763
+ function detectTutorStrategy(response) {
764
+ const lower = response.toLowerCase();
765
+
766
+ if (lower.includes('?') && (lower.includes('what do you think') || lower.includes('how might'))) {
767
+ return 'socratic_questioning';
768
+ }
769
+ if (lower.includes('for example') || lower.includes('imagine') || lower.includes('like when')) {
770
+ return 'concrete_examples';
771
+ }
772
+ if (lower.includes('let me break') || lower.includes('first') || lower.includes('step by step')) {
773
+ return 'scaffolding';
774
+ }
775
+ if (lower.includes("you're right") || lower.includes('good observation') || lower.includes('exactly')) {
776
+ return 'validation';
777
+ }
778
+ if (lower.includes('actually') || lower.includes('important distinction') || lower.includes('however')) {
779
+ return 'gentle_correction';
780
+ }
781
+ if (lower.includes('challenge') || lower.includes('consider') || lower.includes('what if')) {
782
+ return 'intellectual_challenge';
783
+ }
784
+ return 'direct_explanation';
785
+ }
786
+
787
+ /**
788
+ * Detect outcomes from a turn
789
+ */
790
+ function detectTurnOutcomes(learnerResponse, tutorResponse) {
791
+ const outcomes = [];
792
+
793
+ if (learnerResponse.understandingLevel === 'transforming') {
794
+ outcomes.push(INTERACTION_OUTCOMES.BREAKTHROUGH);
795
+ }
796
+ if (learnerResponse.emotionalState === 'confused' && learnerResponse.understandingLevel === 'developing') {
797
+ outcomes.push(INTERACTION_OUTCOMES.PRODUCTIVE_STRUGGLE);
798
+ }
799
+ if (learnerResponse.emotionalState === 'frustrated') {
800
+ outcomes.push(INTERACTION_OUTCOMES.FRUSTRATION);
801
+ }
802
+ if (learnerResponse.emotionalState === 'disengaged') {
803
+ outcomes.push(INTERACTION_OUTCOMES.DISENGAGEMENT);
804
+ }
805
+
806
+ return outcomes;
807
+ }
808
+
809
+ /**
810
+ * Generate summary of interaction
811
+ */
812
+ function generateInteractionSummary(trace) {
813
+ const uniqueOutcomes = [...new Set(trace.outcomes)];
814
+
815
+ return {
816
+ turnCount: trace.turns.length,
817
+ uniqueOutcomes,
818
+ hadBreakthrough: uniqueOutcomes.includes(INTERACTION_OUTCOMES.BREAKTHROUGH),
819
+ hadFrustration: uniqueOutcomes.includes(INTERACTION_OUTCOMES.FRUSTRATION),
820
+ hadProductiveStruggle: uniqueOutcomes.includes(INTERACTION_OUTCOMES.PRODUCTIVE_STRUGGLE),
821
+ learnerFinalState: trace.turns[trace.turns.length - 1]?.emotionalState || 'unknown',
822
+ learnerFinalUnderstanding: trace.turns[trace.turns.length - 1]?.understandingLevel || 'unknown',
823
+ memoryChanges: {
824
+ learner: calculateMemoryDelta(
825
+ trace.writingPadSnapshots.learner.before,
826
+ trace.writingPadSnapshots.learner.after
827
+ ),
828
+ tutor: calculateMemoryDelta(
829
+ trace.writingPadSnapshots.tutor.before,
830
+ trace.writingPadSnapshots.tutor.after
831
+ ),
832
+ },
833
+ };
834
+ }
835
+
836
+ /**
837
+ * Calculate what changed in writing pad
838
+ */
839
+ function calculateMemoryDelta(before, after) {
840
+ if (!before || !after) return { noData: true };
841
+
842
+ // Simple delta calculation
843
+ return {
844
+ newLessons: (after.preconscious?.lessons?.length || 0) - (before.preconscious?.lessons?.length || 0),
845
+ newBreakthroughs: (after.unconscious?.breakthroughs?.length || 0) - (before.unconscious?.breakthroughs?.length || 0),
846
+ newTraumas: (after.unconscious?.unresolvedTraumas?.length || 0) - (before.unconscious?.unresolvedTraumas?.length || 0),
847
+ };
848
+ }
849
+
850
+ // ============================================================================
851
+ // Exports
852
+ // ============================================================================
853
+
854
+ export default {
855
+ runInteraction,
856
+ INTERACTION_OUTCOMES,
857
+ };