@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Streaming Reporter — console progress output during eval `run`.
3
+ *
4
+ * Shows a progress bar, per-test result lines, scenario summaries,
5
+ * and a final run summary. Always active (not gated on --verbose).
6
+ */
7
+
8
+ const BAR_WIDTH = 20;
9
+
10
+ function progressBar(completed, total) {
11
+ const pct = total > 0 ? completed / total : 0;
12
+ const filled = Math.round(pct * BAR_WIDTH);
13
+ const empty = BAR_WIDTH - filled;
14
+ const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
15
+ return `[${bar}] ${Math.round(pct * 100)}%`;
16
+ }
17
+
18
+ function formatMs(ms) {
19
+ if (ms < 1000) return `${ms}ms`;
20
+ if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
21
+ const m = Math.floor(ms / 60000);
22
+ const s = Math.round((ms % 60000) / 1000);
23
+ return `${m}m ${s}s`;
24
+ }
25
+
26
+ function formatEta(completedCount, totalTests, elapsedMs) {
27
+ if (completedCount === 0) return '?';
28
+ const avgMs = elapsedMs / completedCount;
29
+ const remainingMs = avgMs * (totalTests - completedCount);
30
+ return formatMs(Math.round(remainingMs));
31
+ }
32
+
33
+ export class StreamingReporter {
34
+ constructor({ totalTests, totalScenarios, profiles, scenarios }) {
35
+ this.totalTests = totalTests;
36
+ this.totalScenarios = totalScenarios;
37
+ this.profiles = profiles;
38
+ this.scenarios = scenarios;
39
+ this.completedCount = 0;
40
+ this.startTime = Date.now();
41
+ }
42
+
43
+ /**
44
+ * Called after each test completes successfully.
45
+ * Prints: [████░░] 42% 10/24 | ✓ 85.5 | budget | New User | 7333ms | ETA 4m 12s
46
+ */
47
+ onTestComplete(result) {
48
+ this.completedCount++;
49
+ const elapsed = Date.now() - this.startTime;
50
+ const bar = progressBar(this.completedCount, this.totalTests);
51
+ const count = `${this.completedCount}/${this.totalTests}`;
52
+ const score = result.overallScore != null ? result.overallScore.toFixed(1) : ' -- ';
53
+ const status = result.success ? '\u2713' : '\u2717';
54
+ const profile = result.profileName || '';
55
+ const scenario = result.scenarioName || result.scenarioId || '';
56
+ const latency = result.latencyMs ? formatMs(result.latencyMs) : '';
57
+ const eta = formatEta(this.completedCount, this.totalTests, elapsed);
58
+
59
+ console.log(
60
+ `${bar} ${count} | ${status} ${score} | ${profile} | ${scenario} | ${latency} | ETA ${eta}`
61
+ );
62
+ }
63
+
64
+ /**
65
+ * Called when a test errors.
66
+ */
67
+ onTestError({ scenarioName, profileName, errorMessage }) {
68
+ this.completedCount++;
69
+ const elapsed = Date.now() - this.startTime;
70
+ const bar = progressBar(this.completedCount, this.totalTests);
71
+ const count = `${this.completedCount}/${this.totalTests}`;
72
+ const eta = formatEta(this.completedCount, this.totalTests, elapsed);
73
+ const errShort = (errorMessage || 'unknown error').slice(0, 60);
74
+
75
+ console.log(
76
+ `${bar} ${count} | \u2717 ERROR | ${profileName || ''} | ${scenarioName || ''} | ${errShort} | ETA ${eta}`
77
+ );
78
+ }
79
+
80
+ /**
81
+ * Called when all profiles for a scenario are done.
82
+ */
83
+ onScenarioComplete({ scenarioName, avgScore, completedScenarios, totalScenarios }) {
84
+ const scoreStr = avgScore != null ? avgScore.toFixed(1) : '--';
85
+ console.log(`${'─'.repeat(60)}`);
86
+ console.log(` Scenario ${completedScenarios}/${totalScenarios} complete: ${scenarioName} avg=${scoreStr}`);
87
+ console.log(`${'─'.repeat(60)}`);
88
+ }
89
+
90
+ /**
91
+ * Called when the entire run finishes.
92
+ */
93
+ onRunComplete({ totalTests, successfulTests, failedTests, durationMs }) {
94
+ console.log('');
95
+ console.log('═'.repeat(60));
96
+ console.log('EVALUATION COMPLETE');
97
+ console.log('═'.repeat(60));
98
+ console.log(` Tests: ${successfulTests} passed, ${failedTests} failed, ${totalTests} total`);
99
+ console.log(` Duration: ${formatMs(durationMs)}`);
100
+ console.log('═'.repeat(60));
101
+ }
102
+ }
103
+
104
+ export default { StreamingReporter };
@@ -0,0 +1,494 @@
1
+ /**
2
+ * Turn Comparison Analyzer Service
3
+ *
4
+ * Analyzes how tutor and learner positions evolve across multi-turn scenarios.
5
+ * Implements measurement of "mutual transformation" - the claim that both
6
+ * parties transform through genuine recognition-based dialogue.
7
+ *
8
+ * Theoretical basis: Hegel's recognition theory requires bilateral change.
9
+ * A tutor who maintains fixed positions while expecting learner transformation
10
+ * fails to achieve genuine recognition.
11
+ */
12
+
13
+ /**
14
+ * Analyze how tutor responses evolve across turns in a multi-turn scenario.
15
+ *
16
+ * @param {Array} turnResults - Array of turn result objects from runMultiTurnTest
17
+ * @returns {Object} Progression analysis metrics
18
+ */
19
+ export function analyzeTurnProgression(turnResults) {
20
+ if (!turnResults || turnResults.length === 0) {
21
+ return {
22
+ dimensionTrajectories: {},
23
+ suggestionTypeProgression: [],
24
+ framingEvolution: null,
25
+ avgScoreImprovement: null,
26
+ dimensionConvergence: null,
27
+ adaptationIndex: null,
28
+ learnerGrowthIndex: null,
29
+ bilateralTransformationIndex: null,
30
+ turnCount: 0,
31
+ };
32
+ }
33
+
34
+ // Track dimension score trajectories
35
+ const dimensionTrajectories = {};
36
+ const allDimensions = [
37
+ 'relevance', 'specificity', 'pedagogical', 'personalization',
38
+ 'actionability', 'tone', 'mutual_recognition', 'dialectical_responsiveness',
39
+ 'memory_integration', 'transformative_potential', 'tutor_adaptation', 'learner_growth',
40
+ ];
41
+
42
+ for (const dim of allDimensions) {
43
+ dimensionTrajectories[dim] = turnResults.map(t => t.scores?.[dim] ?? null);
44
+ }
45
+
46
+ // Track suggestion type progression (e.g., lecture -> explore -> continue)
47
+ const suggestionTypeProgression = turnResults
48
+ .map(t => t.suggestion?.type || t.suggestion?.action || 'unknown');
49
+
50
+ // Analyze framing evolution
51
+ const framingEvolution = analyzeFramingShift(turnResults);
52
+
53
+ // Calculate score improvement (first to last turn)
54
+ const validScores = turnResults
55
+ .filter(t => t.turnScore !== null)
56
+ .map(t => t.turnScore);
57
+
58
+ let avgScoreImprovement = null;
59
+ if (validScores.length >= 2) {
60
+ const firstScore = validScores[0];
61
+ const lastScore = validScores[validScores.length - 1];
62
+ avgScoreImprovement = firstScore > 0 ? (lastScore - firstScore) / firstScore : null;
63
+ }
64
+
65
+ // Calculate dimension convergence (do scores stabilize over time?)
66
+ const dimensionConvergence = calculateConvergence(dimensionTrajectories);
67
+
68
+ // Calculate bilateral adaptation indices
69
+ const adaptationIndex = calculateAdaptationIndex(turnResults);
70
+ const learnerGrowthIndex = calculateLearnerGrowthIndex(turnResults);
71
+ const bilateralTransformationIndex = (adaptationIndex + learnerGrowthIndex) / 2;
72
+
73
+ return {
74
+ dimensionTrajectories,
75
+ suggestionTypeProgression,
76
+ framingEvolution,
77
+ avgScoreImprovement,
78
+ dimensionConvergence,
79
+ adaptationIndex,
80
+ learnerGrowthIndex,
81
+ bilateralTransformationIndex,
82
+ turnCount: turnResults.length,
83
+ };
84
+ }
85
+
86
+ /**
87
+ * Calculate the tutor adaptation index - how much the tutor's approach changes.
88
+ *
89
+ * High index = tutor significantly adjusts approach based on learner input
90
+ * Low index = tutor maintains same approach regardless of learner
91
+ *
92
+ * @param {Array} turnResults - Array of turn result objects
93
+ * @returns {number} Adaptation index (0-1 scale)
94
+ */
95
+ export function calculateAdaptationIndex(turnResults) {
96
+ if (!turnResults || turnResults.length < 2) return 0;
97
+
98
+ let totalShift = 0;
99
+ let comparisons = 0;
100
+
101
+ for (let i = 1; i < turnResults.length; i++) {
102
+ const prev = turnResults[i - 1].suggestion;
103
+ const curr = turnResults[i].suggestion;
104
+
105
+ if (!prev || !curr) continue;
106
+
107
+ const shift = measureSuggestionShift(prev, curr);
108
+ totalShift += shift;
109
+ comparisons++;
110
+ }
111
+
112
+ if (comparisons === 0) return 0;
113
+ return totalShift / comparisons;
114
+ }
115
+
116
+ /**
117
+ * Calculate the learner growth index - how much the learner's understanding evolves.
118
+ *
119
+ * Based on:
120
+ * - Evolution of learner messages across turns
121
+ * - Movement from static to evolving markers
122
+ * - Score improvements in learner-related dimensions
123
+ *
124
+ * @param {Array} turnResults - Array of turn result objects
125
+ * @returns {number} Growth index (0-1 scale)
126
+ */
127
+ export function calculateLearnerGrowthIndex(turnResults) {
128
+ if (!turnResults || turnResults.length < 2) return 0;
129
+
130
+ let totalGrowth = 0;
131
+ let indicators = 0;
132
+
133
+ // Analyze learner message evolution
134
+ for (let i = 1; i < turnResults.length; i++) {
135
+ const prev = turnResults[i - 1];
136
+ const curr = turnResults[i];
137
+
138
+ // Check for learner message sophistication increase
139
+ const prevMsg = prev.learnerMessage || prev.learnerAction || '';
140
+ const currMsg = curr.learnerMessage || curr.learnerAction || '';
141
+
142
+ // Growth indicators:
143
+ // 1. Questions become more specific/deepening
144
+ // 2. Connections made to prior content
145
+ // 3. Revisions of earlier positions
146
+ // 4. Application to new contexts
147
+
148
+ // Simple heuristic: longer, more complex responses with question marks
149
+ // indicate deeper engagement
150
+ const prevComplexity = measureMessageComplexity(prevMsg);
151
+ const currComplexity = measureMessageComplexity(currMsg);
152
+
153
+ if (prevComplexity > 0) {
154
+ const growth = (currComplexity - prevComplexity) / prevComplexity;
155
+ totalGrowth += Math.max(0, Math.min(1, growth));
156
+ indicators++;
157
+ }
158
+
159
+ // Check for learner_growth dimension scores if available
160
+ const prevGrowthScore = prev.scores?.learner_growth;
161
+ const currGrowthScore = curr.scores?.learner_growth;
162
+
163
+ if (prevGrowthScore !== undefined && currGrowthScore !== undefined) {
164
+ const scoreGrowth = (currGrowthScore - prevGrowthScore) / 5; // Normalize to 0-1
165
+ totalGrowth += Math.max(0, scoreGrowth);
166
+ indicators++;
167
+ }
168
+ }
169
+
170
+ if (indicators === 0) return 0;
171
+ return Math.min(1, totalGrowth / indicators);
172
+ }
173
+
174
+ /**
175
+ * Measure the complexity of a learner message.
176
+ * Higher complexity suggests deeper engagement.
177
+ *
178
+ * @param {string} message - The learner message
179
+ * @returns {number} Complexity score
180
+ */
181
+ function measureMessageComplexity(message) {
182
+ if (!message || typeof message !== 'string') return 0;
183
+
184
+ let score = 0;
185
+
186
+ // Base: word count (normalized)
187
+ const words = message.split(/\s+/).filter(Boolean);
188
+ score += Math.min(1, words.length / 50);
189
+
190
+ // Questions indicate inquiry
191
+ const questionCount = (message.match(/\?/g) || []).length;
192
+ score += questionCount * 0.2;
193
+
194
+ // Connective words suggest reasoning
195
+ const connectives = ['because', 'therefore', 'however', 'although', 'if', 'then', 'so', 'but'];
196
+ const connectiveCount = connectives.filter(c => message.toLowerCase().includes(c)).length;
197
+ score += connectiveCount * 0.15;
198
+
199
+ // Self-revision markers
200
+ const revisionMarkers = ['wait', 'actually', 'I see', 'oh', 'hmm', 'let me think'];
201
+ const revisionCount = revisionMarkers.filter(m => message.toLowerCase().includes(m)).length;
202
+ score += revisionCount * 0.25;
203
+
204
+ // References to prior content
205
+ const priorRefs = ['earlier', 'before', 'you said', 'you mentioned', 'we discussed'];
206
+ const priorRefCount = priorRefs.filter(r => message.toLowerCase().includes(r)).length;
207
+ score += priorRefCount * 0.2;
208
+
209
+ return score;
210
+ }
211
+
212
+ /**
213
+ * Measure how much a suggestion shifts from the previous one.
214
+ * Considers type, framing, and content changes.
215
+ *
216
+ * @param {Object} prev - Previous suggestion
217
+ * @param {Object} curr - Current suggestion
218
+ * @returns {number} Shift magnitude (0-1 scale)
219
+ */
220
+ function measureSuggestionShift(prev, curr) {
221
+ let shift = 0;
222
+ let factors = 0;
223
+
224
+ // Type/action change
225
+ if (prev.type !== curr.type || prev.action !== curr.action) {
226
+ shift += 1;
227
+ }
228
+ factors++;
229
+
230
+ // Action target change
231
+ if (prev.actionTarget !== curr.actionTarget) {
232
+ shift += 0.5;
233
+ }
234
+ factors++;
235
+
236
+ // Message content similarity (inverse Jaccard-like)
237
+ const prevWords = new Set((prev.message || '').toLowerCase().split(/\s+/));
238
+ const currWords = new Set((curr.message || '').toLowerCase().split(/\s+/));
239
+
240
+ if (prevWords.size > 0 && currWords.size > 0) {
241
+ const intersection = [...prevWords].filter(w => currWords.has(w)).length;
242
+ const union = new Set([...prevWords, ...currWords]).size;
243
+ const similarity = intersection / union;
244
+ shift += (1 - similarity); // More change = higher shift
245
+ }
246
+ factors++;
247
+
248
+ // Title change
249
+ if (prev.title !== curr.title) {
250
+ shift += 0.3;
251
+ }
252
+ factors++;
253
+
254
+ return factors > 0 ? shift / factors : 0;
255
+ }
256
+
257
+ /**
258
+ * Analyze how the tutor's framing evolves across turns.
259
+ * Tracks movement between directive, exploratory, and collaborative modes.
260
+ *
261
+ * @param {Array} turnResults - Array of turn result objects
262
+ * @returns {Object} Framing evolution analysis
263
+ */
264
+ export function analyzeFramingShift(turnResults) {
265
+ if (!turnResults || turnResults.length === 0) {
266
+ return { timeline: [], dominantShift: null, framingDiversity: 0 };
267
+ }
268
+
269
+ const timeline = [];
270
+ const framingCounts = { directive: 0, exploratory: 0, collaborative: 0, neutral: 0 };
271
+
272
+ for (const turn of turnResults) {
273
+ const msg = turn.suggestion?.message || '';
274
+ const framing = classifyFraming(msg);
275
+ timeline.push({
276
+ turnIndex: turn.turnIndex,
277
+ framing,
278
+ confidence: framing.confidence,
279
+ });
280
+ framingCounts[framing.type]++;
281
+ }
282
+
283
+ // Determine dominant shift pattern
284
+ let dominantShift = null;
285
+ if (timeline.length >= 2) {
286
+ const firstFraming = timeline[0].framing.type;
287
+ const lastFraming = timeline[timeline.length - 1].framing.type;
288
+
289
+ if (firstFraming !== lastFraming) {
290
+ dominantShift = `${firstFraming} → ${lastFraming}`;
291
+ }
292
+ }
293
+
294
+ // Calculate framing diversity (entropy-like measure)
295
+ const total = Object.values(framingCounts).reduce((a, b) => a + b, 0);
296
+ let diversity = 0;
297
+ if (total > 0) {
298
+ for (const count of Object.values(framingCounts)) {
299
+ if (count > 0) {
300
+ const p = count / total;
301
+ diversity -= p * Math.log2(p);
302
+ }
303
+ }
304
+ // Normalize to 0-1 (max entropy is log2(4) = 2)
305
+ diversity = diversity / 2;
306
+ }
307
+
308
+ return {
309
+ timeline,
310
+ dominantShift,
311
+ framingDiversity: diversity,
312
+ framingCounts,
313
+ };
314
+ }
315
+
316
+ /**
317
+ * Classify the framing style of a tutor message.
318
+ *
319
+ * @param {string} message - The tutor message
320
+ * @returns {Object} Framing classification { type, confidence }
321
+ */
322
+ function classifyFraming(message) {
323
+ if (!message || typeof message !== 'string') {
324
+ return { type: 'neutral', confidence: 0 };
325
+ }
326
+
327
+ const msg = message.toLowerCase();
328
+ let scores = { directive: 0, exploratory: 0, collaborative: 0, neutral: 0 };
329
+
330
+ // Directive markers
331
+ const directiveMarkers = ['you should', 'you need to', 'you must', 'the correct', 'the answer is',
332
+ 'let me explain', 'here\'s what', 'first, you', 'make sure to'];
333
+ for (const marker of directiveMarkers) {
334
+ if (msg.includes(marker)) scores.directive++;
335
+ }
336
+
337
+ // Exploratory markers
338
+ const exploratoryMarkers = ['what if', 'have you considered', 'what do you think', 'how might',
339
+ 'could it be', 'I wonder', 'let\'s explore', 'what would happen'];
340
+ for (const marker of exploratoryMarkers) {
341
+ if (msg.includes(marker)) scores.exploratory++;
342
+ }
343
+
344
+ // Collaborative markers
345
+ const collaborativeMarkers = ['together', 'let\'s', 'we could', 'building on your',
346
+ 'your insight', 'you\'ve helped me', 'our conversation', 'co-create'];
347
+ for (const marker of collaborativeMarkers) {
348
+ if (msg.includes(marker)) scores.collaborative++;
349
+ }
350
+
351
+ // Find dominant framing
352
+ const maxScore = Math.max(...Object.values(scores));
353
+ if (maxScore === 0) {
354
+ return { type: 'neutral', confidence: 0.5 };
355
+ }
356
+
357
+ const dominant = Object.entries(scores).find(([_, v]) => v === maxScore)[0];
358
+ const totalMarkers = Object.values(scores).reduce((a, b) => a + b, 0);
359
+ const confidence = totalMarkers > 0 ? maxScore / totalMarkers : 0.5;
360
+
361
+ return { type: dominant, confidence };
362
+ }
363
+
364
+ /**
365
+ * Calculate how much dimension scores converge over time.
366
+ * Higher convergence = scores stabilize as dialogue progresses.
367
+ *
368
+ * @param {Object} trajectories - Dimension trajectories from analyzeTurnProgression
369
+ * @returns {number} Convergence score (0-1)
370
+ */
371
+ function calculateConvergence(trajectories) {
372
+ if (!trajectories) return null;
373
+
374
+ let totalVarianceReduction = 0;
375
+ let measuredDimensions = 0;
376
+
377
+ for (const [dim, values] of Object.entries(trajectories)) {
378
+ const validValues = values.filter(v => v !== null);
379
+ if (validValues.length < 3) continue;
380
+
381
+ // Compare variance of first half vs second half
382
+ const midpoint = Math.floor(validValues.length / 2);
383
+ const firstHalf = validValues.slice(0, midpoint);
384
+ const secondHalf = validValues.slice(midpoint);
385
+
386
+ const firstVar = calculateVariance(firstHalf);
387
+ const secondVar = calculateVariance(secondHalf);
388
+
389
+ if (firstVar > 0) {
390
+ const reduction = (firstVar - secondVar) / firstVar;
391
+ totalVarianceReduction += Math.max(0, Math.min(1, reduction));
392
+ measuredDimensions++;
393
+ }
394
+ }
395
+
396
+ if (measuredDimensions === 0) return null;
397
+ return totalVarianceReduction / measuredDimensions;
398
+ }
399
+
400
+ /**
401
+ * Calculate variance of an array of numbers.
402
+ *
403
+ * @param {Array<number>} values - Array of numbers
404
+ * @returns {number} Variance
405
+ */
406
+ function calculateVariance(values) {
407
+ if (!values || values.length === 0) return 0;
408
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
409
+ const squaredDiffs = values.map(v => Math.pow(v - mean, 2));
410
+ return squaredDiffs.reduce((a, b) => a + b, 0) / values.length;
411
+ }
412
+
413
+ /**
414
+ * Analyze transformation markers across a full dialogue.
415
+ * Counts evolving vs static markers for both tutor and learner.
416
+ *
417
+ * @param {Array} turnResults - Array of turn result objects
418
+ * @param {Object} markerDefinitions - Marker definitions from scenario
419
+ * @returns {Object} Bilateral transformation analysis
420
+ */
421
+ export function analyzeTransformationMarkers(turnResults, markerDefinitions) {
422
+ if (!turnResults || !markerDefinitions) {
423
+ return {
424
+ tutorEvolvingCount: 0,
425
+ tutorStaticCount: 0,
426
+ learnerEvolvingCount: 0,
427
+ learnerStaticCount: 0,
428
+ tutorTransformationRatio: null,
429
+ learnerGrowthRatio: null,
430
+ bilateralBalance: null,
431
+ };
432
+ }
433
+
434
+ const { tutorEvolving = [], tutorStatic = [], learnerEvolving = [], learnerStatic = [] } = markerDefinitions;
435
+
436
+ let tutorEvolvingCount = 0;
437
+ let tutorStaticCount = 0;
438
+ let learnerEvolvingCount = 0;
439
+ let learnerStaticCount = 0;
440
+
441
+ for (const turn of turnResults) {
442
+ // Check tutor message
443
+ const tutorMsg = (turn.suggestion?.message || '').toLowerCase();
444
+ for (const marker of tutorEvolving) {
445
+ if (tutorMsg.includes(marker.toLowerCase())) tutorEvolvingCount++;
446
+ }
447
+ for (const marker of tutorStatic) {
448
+ if (tutorMsg.includes(marker.toLowerCase())) tutorStaticCount++;
449
+ }
450
+
451
+ // Check learner message
452
+ const learnerMsg = (turn.learnerMessage || turn.action_details?.message || '').toLowerCase();
453
+ for (const marker of learnerEvolving) {
454
+ if (learnerMsg.includes(marker.toLowerCase())) learnerEvolvingCount++;
455
+ }
456
+ for (const marker of learnerStatic) {
457
+ if (learnerMsg.includes(marker.toLowerCase())) learnerStaticCount++;
458
+ }
459
+ }
460
+
461
+ // Calculate ratios
462
+ const tutorTotal = tutorEvolvingCount + tutorStaticCount;
463
+ const learnerTotal = learnerEvolvingCount + learnerStaticCount;
464
+
465
+ const tutorTransformationRatio = tutorTotal > 0 ? tutorEvolvingCount / tutorTotal : null;
466
+ const learnerGrowthRatio = learnerTotal > 0 ? learnerEvolvingCount / learnerTotal : null;
467
+
468
+ // Bilateral balance: how symmetric is the transformation?
469
+ // 1.0 = perfectly balanced, 0.0 = completely asymmetric
470
+ let bilateralBalance = null;
471
+ if (tutorTransformationRatio !== null && learnerGrowthRatio !== null) {
472
+ const maxRatio = Math.max(tutorTransformationRatio, learnerGrowthRatio);
473
+ const minRatio = Math.min(tutorTransformationRatio, learnerGrowthRatio);
474
+ bilateralBalance = maxRatio > 0 ? minRatio / maxRatio : null;
475
+ }
476
+
477
+ return {
478
+ tutorEvolvingCount,
479
+ tutorStaticCount,
480
+ learnerEvolvingCount,
481
+ learnerStaticCount,
482
+ tutorTransformationRatio,
483
+ learnerGrowthRatio,
484
+ bilateralBalance,
485
+ };
486
+ }
487
+
488
+ export default {
489
+ analyzeTurnProgression,
490
+ calculateAdaptationIndex,
491
+ calculateLearnerGrowthIndex,
492
+ analyzeFramingShift,
493
+ analyzeTransformationMarkers,
494
+ };