npm - @machinespirits/eval - Versions diffs - 0.2.0 → 0.3.0 - Mend

@machinespirits/eval 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/README.md +91 -9
package/config/eval-settings.yaml +3 -3
package/config/paper-manifest.json +486 -0
package/config/providers.yaml +9 -6
package/config/tutor-agents.yaml +2261 -0
package/content/README.md +23 -0
package/content/courses/479/course.md +53 -0
package/content/courses/479/lecture-1.md +361 -0
package/content/courses/479/lecture-2.md +360 -0
package/content/courses/479/lecture-3.md +655 -0
package/content/courses/479/lecture-4.md +530 -0
package/content/courses/479/lecture-5.md +326 -0
package/content/courses/479/lecture-6.md +346 -0
package/content/courses/479/lecture-7.md +326 -0
package/content/courses/479/lecture-8.md +273 -0
package/content/courses/479/roadmap-slides.md +656 -0
package/content/manifest.yaml +8 -0
package/docs/research/build.sh +44 -20
package/docs/research/figures/figure10.png +0 -0
package/docs/research/figures/figure11.png +0 -0
package/docs/research/figures/figure3.png +0 -0
package/docs/research/figures/figure4.png +0 -0
package/docs/research/figures/figure5.png +0 -0
package/docs/research/figures/figure6.png +0 -0
package/docs/research/figures/figure7.png +0 -0
package/docs/research/figures/figure8.png +0 -0
package/docs/research/figures/figure9.png +0 -0
package/docs/research/header.tex +23 -2
package/docs/research/paper-full.md +941 -285
package/docs/research/paper-short.md +216 -585
package/docs/research/references.bib +132 -0
package/docs/research/slides-header.tex +188 -0
package/docs/research/slides-pptx.md +363 -0
package/docs/research/slides.md +531 -0
package/docs/research/style-reference-pptx.py +199 -0
package/package.json +6 -5
package/scripts/analyze-eval-results.js +69 -17
package/scripts/analyze-mechanism-traces.js +763 -0
package/scripts/analyze-modulation-learning.js +498 -0
package/scripts/analyze-prosthesis.js +144 -0
package/scripts/analyze-run.js +264 -79
package/scripts/assess-transcripts.js +853 -0
package/scripts/browse-transcripts.js +854 -0
package/scripts/check-parse-failures.js +73 -0
package/scripts/code-dialectical-modulation.js +1320 -0
package/scripts/download-data.sh +55 -0
package/scripts/eval-cli.js +106 -18
package/scripts/generate-paper-figures.js +663 -0
package/scripts/generate-paper-figures.py +577 -76
package/scripts/generate-paper-tables.js +299 -0
package/scripts/qualitative-analysis-ai.js +3 -3
package/scripts/render-sequence-diagram.js +694 -0
package/scripts/test-latency.js +210 -0
package/scripts/test-rate-limit.js +95 -0
package/scripts/test-token-budget.js +332 -0
package/scripts/validate-paper-manifest.js +670 -0
package/services/__tests__/evalConfigLoader.test.js +2 -2
package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
package/services/evaluationRunner.js +975 -98
package/services/evaluationStore.js +12 -4
package/services/learnerTutorInteractionEngine.js +27 -2
package/services/mockProvider.js +133 -0
package/services/promptRewriter.js +1471 -5
package/services/rubricEvaluator.js +55 -2
package/services/transcriptFormatter.js +675 -0
package/docs/EVALUATION-VARIABLES.md +0 -589
package/docs/REPLICATION-PLAN.md +0 -577
package/scripts/analyze-run.mjs +0 -282
package/scripts/compare-runs.js +0 -44
package/scripts/compare-suggestions.js +0 -80
package/scripts/dig-into-run.js +0 -158
package/scripts/show-failed-suggestions.js +0 -64
/package/scripts/{check-run.mjs → check-run.js} +0 -0

package/services/__tests__/learnerRubricEvaluator.test.js ADDED Viewed

@@ -0,0 +1,361 @@
+/**
+ * Tests for learnerRubricEvaluator — learner-side scoring.
+ *
+ * Uses node:test (built-in, no dependencies required).
+ * Run: node --test services/__tests__/learnerRubricEvaluator.test.js
+ */
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import {
+  loadLearnerRubric,
+  getLearnerDimensions,
+  calculateLearnerOverallScore,
+  buildLearnerEvaluationPrompt,
+} from '../learnerRubricEvaluator.js';
+// ============================================================================
+// loadLearnerRubric
+// ============================================================================
+describe('loadLearnerRubric', () => {
+  it('loads and parses the learner rubric YAML', () => {
+    const rubric = loadLearnerRubric({ forceReload: true });
+    assert.ok(rubric, 'should return parsed rubric');
+    assert.ok(rubric.dimensions, 'should have dimensions');
+    assert.ok(rubric.name, 'should have name');
+  });
+  it('returns cached result on second call', () => {
+    const first = loadLearnerRubric({ forceReload: true });
+    const second = loadLearnerRubric();
+    assert.strictEqual(first, second, 'should return same cached reference');
+  });
+  it('contains all 6 expected dimensions', () => {
+    const rubric = loadLearnerRubric({ forceReload: true });
+    const keys = Object.keys(rubric.dimensions);
+    assert.ok(keys.includes('learner_authenticity'));
+    assert.ok(keys.includes('question_quality'));
+    assert.ok(keys.includes('conceptual_engagement'));
+    assert.ok(keys.includes('revision_signals'));
+    assert.ok(keys.includes('deliberation_depth'));
+    assert.ok(keys.includes('persona_consistency'));
+    assert.strictEqual(keys.length, 6);
+  });
+  it('each dimension has name, weight, description, and criteria', () => {
+    const rubric = loadLearnerRubric({ forceReload: true });
+    for (const [key, dim] of Object.entries(rubric.dimensions)) {
+      assert.ok(dim.name, `${key} should have name`);
+      assert.ok(typeof dim.weight === 'number', `${key} should have numeric weight`);
+      assert.ok(dim.description, `${key} should have description`);
+      assert.ok(dim.criteria, `${key} should have criteria`);
+    }
+  });
+  it('weights sum to 1.0', () => {
+    const rubric = loadLearnerRubric({ forceReload: true });
+    const totalWeight = Object.values(rubric.dimensions)
+      .reduce((sum, dim) => sum + dim.weight, 0);
+    assert.ok(
+      Math.abs(totalWeight - 1.0) < 0.001,
+      `weights should sum to 1.0, got ${totalWeight}`
+    );
+  });
+});
+// ============================================================================
+// getLearnerDimensions
+// ============================================================================
+describe('getLearnerDimensions', () => {
+  it('returns all 6 dimensions for multi-agent learners', () => {
+    const dims = getLearnerDimensions({ isMultiAgent: true });
+    assert.strictEqual(Object.keys(dims).length, 6);
+    assert.ok('deliberation_depth' in dims);
+  });
+  it('returns 5 dimensions for single-agent learners (excludes deliberation_depth)', () => {
+    const dims = getLearnerDimensions({ isMultiAgent: false });
+    assert.strictEqual(Object.keys(dims).length, 5);
+    assert.ok(!('deliberation_depth' in dims));
+  });
+  it('defaults to single-agent when no options provided', () => {
+    const dims = getLearnerDimensions();
+    assert.strictEqual(Object.keys(dims).length, 5);
+    assert.ok(!('deliberation_depth' in dims));
+  });
+  it('does not mutate the cached rubric', () => {
+    // Get single-agent dims (which deletes deliberation_depth from a copy)
+    getLearnerDimensions({ isMultiAgent: false });
+    // Then get multi-agent — should still have all 6
+    const multiDims = getLearnerDimensions({ isMultiAgent: true });
+    assert.strictEqual(Object.keys(multiDims).length, 6);
+    assert.ok('deliberation_depth' in multiDims);
+  });
+});
+// ============================================================================
+// calculateLearnerOverallScore
+// ============================================================================
+describe('calculateLearnerOverallScore', () => {
+  // Rubric weights are 0.20, 0.20, 0.20, 0.15, 0.15, 0.10 — these don't sum
+  // to exactly 1.0 in IEEE 754, so use approximate comparison for score results.
+  const approxEqual = (actual, expected, msg) => {
+    assert.ok(
+      Math.abs(actual - expected) < 0.01,
+      `${msg || 'approxEqual'}: expected ~${expected}, got ${actual}`
+    );
+  };
+  it('returns ~100 when all scores are 5 (multi-agent)', () => {
+    const scores = {
+      learner_authenticity: { score: 5, reasoning: 'test' },
+      question_quality: { score: 5, reasoning: 'test' },
+      conceptual_engagement: { score: 5, reasoning: 'test' },
+      revision_signals: { score: 5, reasoning: 'test' },
+      deliberation_depth: { score: 5, reasoning: 'test' },
+      persona_consistency: { score: 5, reasoning: 'test' },
+    };
+    const result = calculateLearnerOverallScore(scores, true);
+    approxEqual(result, 100);
+  });
+  it('returns 0 when all scores are 1 (multi-agent)', () => {
+    const scores = {
+      learner_authenticity: { score: 1, reasoning: 'test' },
+      question_quality: { score: 1, reasoning: 'test' },
+      conceptual_engagement: { score: 1, reasoning: 'test' },
+      revision_signals: { score: 1, reasoning: 'test' },
+      deliberation_depth: { score: 1, reasoning: 'test' },
+      persona_consistency: { score: 1, reasoning: 'test' },
+    };
+    const result = calculateLearnerOverallScore(scores, true);
+    approxEqual(result, 0);
+  });
+  it('returns ~50 when all scores are 3 (midpoint)', () => {
+    const scores = {
+      learner_authenticity: { score: 3, reasoning: 'test' },
+      question_quality: { score: 3, reasoning: 'test' },
+      conceptual_engagement: { score: 3, reasoning: 'test' },
+      revision_signals: { score: 3, reasoning: 'test' },
+      deliberation_depth: { score: 3, reasoning: 'test' },
+      persona_consistency: { score: 3, reasoning: 'test' },
+    };
+    const result = calculateLearnerOverallScore(scores, true);
+    approxEqual(result, 50);
+  });
+  it('returns ~100 when all scores are 5 (single-agent, no deliberation_depth)', () => {
+    const scores = {
+      learner_authenticity: { score: 5, reasoning: 'test' },
+      question_quality: { score: 5, reasoning: 'test' },
+      conceptual_engagement: { score: 5, reasoning: 'test' },
+      revision_signals: { score: 5, reasoning: 'test' },
+      persona_consistency: { score: 5, reasoning: 'test' },
+    };
+    const result = calculateLearnerOverallScore(scores, false);
+    approxEqual(result, 100);
+  });
+  it('ignores deliberation_depth for single-agent even if provided', () => {
+    const scores = {
+      learner_authenticity: { score: 5, reasoning: 'test' },
+      question_quality: { score: 5, reasoning: 'test' },
+      conceptual_engagement: { score: 5, reasoning: 'test' },
+      revision_signals: { score: 5, reasoning: 'test' },
+      persona_consistency: { score: 5, reasoning: 'test' },
+      deliberation_depth: { score: 1, reasoning: 'should be ignored' },
+    };
+    // Single-agent: deliberation_depth excluded, so all 5s → ~100
+    const result = calculateLearnerOverallScore(scores, false);
+    approxEqual(result, 100);
+  });
+  it('handles plain number scores (not {score, reasoning} objects)', () => {
+    const scores = {
+      learner_authenticity: 4,
+      question_quality: 4,
+      conceptual_engagement: 4,
+      revision_signals: 4,
+      persona_consistency: 4,
+    };
+    const result = calculateLearnerOverallScore(scores, false);
+    approxEqual(result, 75); // (4-1)/4 * 100 = 75
+  });
+  it('returns 0 when no scores provided', () => {
+    const result = calculateLearnerOverallScore({}, false);
+    assert.strictEqual(result, 0);
+  });
+  it('skips invalid scores (out of 1-5 range)', () => {
+    const scores = {
+      learner_authenticity: { score: 0, reasoning: 'invalid' },
+      question_quality: { score: 6, reasoning: 'invalid' },
+      conceptual_engagement: { score: 3, reasoning: 'valid' },
+      revision_signals: { score: 3, reasoning: 'valid' },
+      persona_consistency: { score: 3, reasoning: 'valid' },
+    };
+    const result = calculateLearnerOverallScore(scores, false);
+    // Only the three valid scores (all 3s) count → ~50
+    approxEqual(result, 50);
+  });
+  it('correctly applies weights for mixed scores', () => {
+    // Multi-agent: weights are 0.20, 0.20, 0.20, 0.15, 0.15, 0.10
+    const scores = {
+      learner_authenticity: { score: 5, reasoning: '' },    // 0.20
+      question_quality: { score: 5, reasoning: '' },         // 0.20
+      conceptual_engagement: { score: 5, reasoning: '' },    // 0.20
+      revision_signals: { score: 1, reasoning: '' },         // 0.15
+      deliberation_depth: { score: 1, reasoning: '' },       // 0.15
+      persona_consistency: { score: 1, reasoning: '' },      // 0.10
+    };
+    // weighted avg = (5*0.20 + 5*0.20 + 5*0.20 + 1*0.15 + 1*0.15 + 1*0.10) / 1.0
+    //             = 3.4
+    // overall = (3.4 - 1) / 4 * 100 = 60
+    const result = calculateLearnerOverallScore(scores, true);
+    approxEqual(result, 60);
+  });
+});
+// ============================================================================
+// buildLearnerEvaluationPrompt
+// ============================================================================
+describe('buildLearnerEvaluationPrompt', () => {
+  const sampleTurns = [
+    {
+      turnNumber: 0,
+      phase: 'learner',
+      externalMessage: 'I do not understand dialectics at all.',
+    },
+    {
+      turnNumber: 1,
+      phase: 'tutor',
+      externalMessage: 'Let me explain — dialectics is about transformation through contradiction.',
+    },
+    {
+      turnNumber: 1,
+      phase: 'learner',
+      externalMessage: 'Oh wait, so it is not just about arguing?',
+      internalDeliberation: [
+        { role: 'ego_initial', content: 'This is confusing but interesting.' },
+        { role: 'superego', content: 'Push deeper — what exactly changed in your understanding?' },
+        { role: 'ego_revision', content: 'I think I was wrong about dialectics being just arguments.' },
+      ],
+    },
+  ];
+  it('builds a prompt string containing key sections', () => {
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: sampleTurns,
+      targetTurnIndex: 2,
+      personaId: 'productive_struggler',
+      personaDescription: 'A student who struggles productively',
+      learnerArchitecture: 'multi_agent',
+      scenarioName: 'Misconception Correction',
+      topic: 'Hegelian dialectics',
+    });
+    assert.ok(typeof prompt === 'string');
+    assert.ok(prompt.includes('EVALUATION RUBRIC'));
+    assert.ok(prompt.includes('LEARNER CONTEXT'));
+    assert.ok(prompt.includes('DIALOGUE HISTORY'));
+    assert.ok(prompt.includes('LEARNER TURN TO EVALUATE'));
+    assert.ok(prompt.includes('productive_struggler'));
+    assert.ok(prompt.includes('Misconception Correction'));
+    assert.ok(prompt.includes('Hegelian dialectics'));
+  });
+  it('includes all 6 dimension keys for multi-agent', () => {
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: sampleTurns,
+      targetTurnIndex: 2,
+      learnerArchitecture: 'multi_agent',
+    });
+    assert.ok(prompt.includes('learner_authenticity'));
+    assert.ok(prompt.includes('question_quality'));
+    assert.ok(prompt.includes('conceptual_engagement'));
+    assert.ok(prompt.includes('revision_signals'));
+    assert.ok(prompt.includes('deliberation_depth'));
+    assert.ok(prompt.includes('persona_consistency'));
+  });
+  it('excludes deliberation_depth for unified learner', () => {
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: sampleTurns,
+      targetTurnIndex: 2,
+      learnerArchitecture: 'unified',
+    });
+    // The dimension key should NOT appear in the JSON example section
+    assert.ok(prompt.includes('learner_authenticity'));
+    assert.ok(prompt.includes('OMIT the deliberation_depth dimension'));
+  });
+  it('includes internal deliberation section for multi-agent learners', () => {
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: sampleTurns,
+      targetTurnIndex: 2,
+      learnerArchitecture: 'multi_agent',
+    });
+    assert.ok(prompt.includes('Internal deliberation'));
+    assert.ok(prompt.includes('Ego (initial reaction)'));
+    assert.ok(prompt.includes('Superego (critique)'));
+    assert.ok(prompt.includes('Ego (revision'));
+  });
+  it('truncates transcript at targetTurnIndex (no future turns)', () => {
+    const extraTurns = [
+      ...sampleTurns,
+      {
+        turnNumber: 2,
+        phase: 'tutor',
+        externalMessage: 'THIS SHOULD NOT APPEAR IN PROMPT',
+      },
+    ];
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: extraTurns,
+      targetTurnIndex: 2, // Evaluate the learner turn at index 2
+      learnerArchitecture: 'unified',
+    });
+    assert.ok(!prompt.includes('THIS SHOULD NOT APPEAR IN PROMPT'));
+  });
+  it('handles missing externalMessage gracefully', () => {
+    const turns = [
+      { turnNumber: 0, phase: 'learner', externalMessage: null },
+    ];
+    const prompt = buildLearnerEvaluationPrompt({
+      turns,
+      targetTurnIndex: 0,
+      learnerArchitecture: 'unified',
+    });
+    assert.ok(prompt.includes('(no message)'));
+  });
+  it('recognizes psychodynamic as multi-agent', () => {
+    const prompt = buildLearnerEvaluationPrompt({
+      turns: sampleTurns,
+      targetTurnIndex: 2,
+      learnerArchitecture: 'psychodynamic',
+    });
+    assert.ok(prompt.includes('deliberation_depth'));
+    assert.ok(prompt.includes('Score ALL dimensions including deliberation_depth'));
+  });
+});

package/services/__tests__/learnerTutorInteractionEngine.test.js ADDED Viewed

@@ -0,0 +1,326 @@
+/**
+ * Tests for pure helper functions in learnerTutorInteractionEngine.
+ *
+ * Tests only the exported utility functions that have no LLM dependencies.
+ * The full runInteraction() and generateLearnerResponse() flows require
+ * LLM calls and are better tested via integration tests.
+ *
+ * Uses node:test (built-in, no dependencies required).
+ * Run: node --test services/__tests__/learnerTutorInteractionEngine.test.js
+ */
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import {
+  detectEmotionalState,
+  detectUnderstandingLevel,
+  detectTutorStrategy,
+  extractTutorMessage,
+  calculateMemoryDelta,
+  INTERACTION_OUTCOMES,
+} from '../learnerTutorInteractionEngine.js';
+// ============================================================================
+// INTERACTION_OUTCOMES
+// ============================================================================
+describe('INTERACTION_OUTCOMES', () => {
+  it('contains all expected outcome types', () => {
+    assert.strictEqual(INTERACTION_OUTCOMES.BREAKTHROUGH, 'breakthrough');
+    assert.strictEqual(INTERACTION_OUTCOMES.PRODUCTIVE_STRUGGLE, 'productive_struggle');
+    assert.strictEqual(INTERACTION_OUTCOMES.MUTUAL_RECOGNITION, 'mutual_recognition');
+    assert.strictEqual(INTERACTION_OUTCOMES.FRUSTRATION, 'frustration');
+    assert.strictEqual(INTERACTION_OUTCOMES.DISENGAGEMENT, 'disengagement');
+    assert.strictEqual(INTERACTION_OUTCOMES.SCAFFOLDING_NEEDED, 'scaffolding_needed');
+    assert.strictEqual(INTERACTION_OUTCOMES.FADING_APPROPRIATE, 'fading_appropriate');
+    assert.strictEqual(INTERACTION_OUTCOMES.TRANSFORMATION, 'transformation');
+  });
+  it('has exactly 8 outcomes', () => {
+    assert.strictEqual(Object.keys(INTERACTION_OUTCOMES).length, 8);
+  });
+});
+// ============================================================================
+// detectEmotionalState
+// ============================================================================
+describe('detectEmotionalState', () => {
+  it('detects frustrated state', () => {
+    const delib = [{ role: 'ego', content: 'I am so frustrated, I want to give up on this confusing topic.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'frustrated');
+  });
+  it('detects engaged state from excitement', () => {
+    const delib = [{ role: 'ego', content: 'This is really exciting and interesting!' }];
+    assert.strictEqual(detectEmotionalState(delib), 'engaged');
+  });
+  it('detects engaged state from curiosity', () => {
+    const delib = [{ role: 'ego', content: 'I am curious about how this works.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'engaged');
+  });
+  it('detects disengaged state', () => {
+    const delib = [{ role: 'ego', content: 'I am bored with this, whatever.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'disengaged');
+  });
+  it('detects satisfied state', () => {
+    const delib = [{ role: 'ego', content: 'I understand this concept now.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'satisfied');
+  });
+  it('detects confused state', () => {
+    const delib = [{ role: 'ego', content: 'I am confused by the terminology.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'confused');
+  });
+  it('returns neutral when no signals found', () => {
+    const delib = [{ role: 'ego', content: 'The topic at hand is dialectics.' }];
+    assert.strictEqual(detectEmotionalState(delib), 'neutral');
+  });
+  it('combines text from multiple deliberation steps', () => {
+    const delib = [
+      { role: 'ego', content: 'Hmm let me think about this.' },
+      { role: 'superego', content: 'This is really interesting, push deeper.' },
+    ];
+    // 'interesting' triggers engaged
+    assert.strictEqual(detectEmotionalState(delib), 'engaged');
+  });
+});
+// ============================================================================
+// detectUnderstandingLevel
+// ============================================================================
+describe('detectUnderstandingLevel', () => {
+  it('detects none level', () => {
+    const delib = [{ role: 'ego', content: 'I am completely lost here, I have no idea what this means.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'none');
+  });
+  it('detects partial level', () => {
+    const delib = [{ role: 'ego', content: 'I am starting to see the pattern, maybe it works like this.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'partial');
+  });
+  it('detects solid level with "makes sense"', () => {
+    const delib = [{ role: 'ego', content: 'That makes sense now, I see how these ideas connect.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'solid');
+  });
+  it('detects solid level with "i get it"', () => {
+    const delib = [{ role: 'ego', content: 'Oh, i get it! The synthesis transforms both sides.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'solid');
+  });
+  it('detects transforming level', () => {
+    const delib = [{ role: 'ego', content: 'Wait, so that means the whole framework needs restructuring.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'transforming');
+  });
+  it('returns developing by default', () => {
+    const delib = [{ role: 'ego', content: 'I am working through the problem carefully.' }];
+    assert.strictEqual(detectUnderstandingLevel(delib), 'developing');
+  });
+});
+// ============================================================================
+// detectTutorStrategy
+// ============================================================================
+describe('detectTutorStrategy', () => {
+  it('detects socratic_questioning', () => {
+    assert.strictEqual(
+      detectTutorStrategy('What do you think would happen if we applied this differently?'),
+      'socratic_questioning'
+    );
+  });
+  it('detects socratic_questioning with "how might"', () => {
+    assert.strictEqual(
+      detectTutorStrategy('How might this concept relate to your experience?'),
+      'socratic_questioning'
+    );
+  });
+  it('detects concrete_examples', () => {
+    assert.strictEqual(
+      detectTutorStrategy('For example, imagine you are building a bridge.'),
+      'concrete_examples'
+    );
+  });
+  it('detects concrete_examples with "like when"', () => {
+    assert.strictEqual(
+      detectTutorStrategy('It is like when you first learned to ride a bicycle.'),
+      'concrete_examples'
+    );
+  });
+  it('detects scaffolding', () => {
+    assert.strictEqual(
+      detectTutorStrategy('Let me break this down. First, we look at the thesis.'),
+      'scaffolding'
+    );
+  });
+  it('detects validation', () => {
+    assert.strictEqual(
+      detectTutorStrategy("You're right, that is an important insight."),
+      'validation'
+    );
+  });
+  it('detects validation with "good observation"', () => {
+    assert.strictEqual(
+      detectTutorStrategy('Good observation! That connection is key.'),
+      'validation'
+    );
+  });
+  it('detects gentle_correction', () => {
+    assert.strictEqual(
+      detectTutorStrategy('Actually, there is an important distinction between these concepts.'),
+      'gentle_correction'
+    );
+  });
+  it('detects intellectual_challenge', () => {
+    assert.strictEqual(
+      detectTutorStrategy('Consider what would happen in the opposite case.'),
+      'intellectual_challenge'
+    );
+  });
+  it('returns direct_explanation as default', () => {
+    assert.strictEqual(
+      detectTutorStrategy('Dialectics is a philosophical framework developed by Hegel.'),
+      'direct_explanation'
+    );
+  });
+});
+// ============================================================================
+// extractTutorMessage
+// ============================================================================
+describe('extractTutorMessage', () => {
+  it('returns plain text as-is', () => {
+    assert.strictEqual(
+      extractTutorMessage('Hello, let me help you understand this concept.'),
+      'Hello, let me help you understand this concept.'
+    );
+  });
+  it('extracts message from JSON array (tutor suggestion format)', () => {
+    const json = JSON.stringify([{ message: 'This is the tutor response.' }]);
+    assert.strictEqual(
+      extractTutorMessage(json),
+      'This is the tutor response.'
+    );
+  });
+  it('extracts message from single JSON object', () => {
+    const json = JSON.stringify({ message: 'A single suggestion.' });
+    assert.strictEqual(
+      extractTutorMessage(json),
+      'A single suggestion.'
+    );
+  });
+  it('returns empty string for null input', () => {
+    assert.strictEqual(extractTutorMessage(null), '');
+  });
+  it('returns empty string for undefined input', () => {
+    assert.strictEqual(extractTutorMessage(undefined), '');
+  });
+  it('returns empty string for empty string input', () => {
+    assert.strictEqual(extractTutorMessage(''), '');
+  });
+  it('returns original text for invalid JSON that starts with [', () => {
+    const text = '[not valid json at all';
+    assert.strictEqual(extractTutorMessage(text), text);
+  });
+  it('returns original text for JSON array without message field', () => {
+    const json = JSON.stringify([{ text: 'no message field' }]);
+    assert.strictEqual(extractTutorMessage(json), json);
+  });
+  it('handles JSON with whitespace padding', () => {
+    const json = '  ' + JSON.stringify([{ message: 'padded' }]) + '  ';
+    assert.strictEqual(extractTutorMessage(json), 'padded');
+  });
+});
+// ============================================================================
+// calculateMemoryDelta
+// ============================================================================
+describe('calculateMemoryDelta', () => {
+  it('returns noData when before is null', () => {
+    const result = calculateMemoryDelta(null, { preconscious: {} });
+    assert.deepStrictEqual(result, { noData: true });
+  });
+  it('returns noData when after is null', () => {
+    const result = calculateMemoryDelta({ preconscious: {} }, null);
+    assert.deepStrictEqual(result, { noData: true });
+  });
+  it('returns noData when both are null', () => {
+    const result = calculateMemoryDelta(null, null);
+    assert.deepStrictEqual(result, { noData: true });
+  });
+  it('calculates zero delta when nothing changed', () => {
+    const state = {
+      preconscious: { lessons: ['a', 'b'] },
+      unconscious: { breakthroughs: ['x'], unresolvedTraumas: [] },
+    };
+    const result = calculateMemoryDelta(state, state);
+    assert.deepStrictEqual(result, {
+      newLessons: 0,
+      newBreakthroughs: 0,
+      newTraumas: 0,
+    });
+  });
+  it('calculates positive deltas when items added', () => {
+    const before = {
+      preconscious: { lessons: ['a'] },
+      unconscious: { breakthroughs: [], unresolvedTraumas: [] },
+    };
+    const after = {
+      preconscious: { lessons: ['a', 'b', 'c'] },
+      unconscious: { breakthroughs: ['x'], unresolvedTraumas: ['y'] },
+    };
+    const result = calculateMemoryDelta(before, after);
+    assert.deepStrictEqual(result, {
+      newLessons: 2,
+      newBreakthroughs: 1,
+      newTraumas: 1,
+    });
+  });
+  it('handles missing nested properties gracefully', () => {
+    const before = {};
+    const after = {
+      preconscious: { lessons: ['a'] },
+      unconscious: { breakthroughs: ['b'] },
+    };
+    const result = calculateMemoryDelta(before, after);
+    assert.deepStrictEqual(result, {
+      newLessons: 1,
+      newBreakthroughs: 1,
+      newTraumas: 0,
+    });
+  });
+});