@machinespirits/eval 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +161 -0
- package/config/eval-settings.yaml +18 -0
- package/config/evaluation-rubric-learner.yaml +277 -0
- package/config/evaluation-rubric.yaml +613 -0
- package/config/interaction-eval-scenarios.yaml +93 -50
- package/config/learner-agents.yaml +124 -193
- package/config/machinespirits-eval.code-workspace +11 -0
- package/config/providers.yaml +60 -0
- package/config/suggestion-scenarios.yaml +1399 -0
- package/config/tutor-agents.yaml +716 -0
- package/docs/EVALUATION-VARIABLES.md +589 -0
- package/docs/REPLICATION-PLAN.md +577 -0
- package/index.js +15 -6
- package/package.json +16 -22
- package/routes/evalRoutes.js +88 -36
- package/scripts/analyze-judge-reliability.js +401 -0
- package/scripts/analyze-run.js +97 -0
- package/scripts/analyze-run.mjs +282 -0
- package/scripts/analyze-validation-failures.js +141 -0
- package/scripts/check-run.mjs +17 -0
- package/scripts/code-impasse-strategies.js +1132 -0
- package/scripts/compare-runs.js +44 -0
- package/scripts/compare-suggestions.js +80 -0
- package/scripts/compare-transformation.js +116 -0
- package/scripts/dig-into-run.js +158 -0
- package/scripts/eval-cli.js +2626 -0
- package/scripts/generate-paper-figures.py +452 -0
- package/scripts/qualitative-analysis-ai.js +1313 -0
- package/scripts/qualitative-analysis.js +688 -0
- package/scripts/seed-db.js +87 -0
- package/scripts/show-failed-suggestions.js +64 -0
- package/scripts/validate-content.js +192 -0
- package/server.js +3 -2
- package/services/__tests__/evalConfigLoader.test.js +338 -0
- package/services/anovaStats.js +499 -0
- package/services/contentResolver.js +407 -0
- package/services/dialogueTraceAnalyzer.js +454 -0
- package/services/evalConfigLoader.js +625 -0
- package/services/evaluationRunner.js +2171 -270
- package/services/evaluationStore.js +564 -29
- package/services/learnerConfigLoader.js +75 -5
- package/services/learnerRubricEvaluator.js +284 -0
- package/services/learnerTutorInteractionEngine.js +375 -0
- package/services/processUtils.js +18 -0
- package/services/progressLogger.js +98 -0
- package/services/promptRecommendationService.js +31 -26
- package/services/promptRewriter.js +427 -0
- package/services/rubricEvaluator.js +543 -70
- package/services/streamingReporter.js +104 -0
- package/services/turnComparisonAnalyzer.js +494 -0
- package/components/MobileEvalDashboard.tsx +0 -267
- package/components/comparison/DeltaAnalysisTable.tsx +0 -137
- package/components/comparison/ProfileComparisonCard.tsx +0 -176
- package/components/comparison/RecognitionABMode.tsx +0 -385
- package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
- package/components/comparison/WinnerIndicator.tsx +0 -64
- package/components/comparison/index.ts +0 -5
- package/components/mobile/BottomSheet.tsx +0 -233
- package/components/mobile/DimensionBreakdown.tsx +0 -210
- package/components/mobile/DocsView.tsx +0 -363
- package/components/mobile/LogsView.tsx +0 -481
- package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
- package/components/mobile/QuickTestView.tsx +0 -1098
- package/components/mobile/RecognitionTypeChart.tsx +0 -124
- package/components/mobile/RecognitionView.tsx +0 -809
- package/components/mobile/RunDetailView.tsx +0 -261
- package/components/mobile/RunHistoryView.tsx +0 -367
- package/components/mobile/ScoreRadial.tsx +0 -211
- package/components/mobile/StreamingLogPanel.tsx +0 -230
- package/components/mobile/SynthesisStrategyChart.tsx +0 -140
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
- package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
- package/docs/research/COST-ANALYSIS.md +0 -56
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
- package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
- package/docs/research/PAPER-UNIFIED.md +0 -659
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
- package/docs/research/apa.csl +0 -2133
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
- package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
- package/docs/research/paper-draft/full-paper.md +0 -136
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +0 -515
- package/docs/research/transcript-baseline.md +0 -139
- package/docs/research/transcript-recognition-multiagent.md +0 -187
- package/hooks/useEvalData.ts +0 -625
- package/server-init.js +0 -45
- package/services/benchmarkService.js +0 -1892
- package/types.ts +0 -165
- package/utils/haptics.ts +0 -45
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming Reporter — console progress output during eval `run`.
|
|
3
|
+
*
|
|
4
|
+
* Shows a progress bar, per-test result lines, scenario summaries,
|
|
5
|
+
* and a final run summary. Always active (not gated on --verbose).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const BAR_WIDTH = 20;
|
|
9
|
+
|
|
10
|
+
function progressBar(completed, total) {
|
|
11
|
+
const pct = total > 0 ? completed / total : 0;
|
|
12
|
+
const filled = Math.round(pct * BAR_WIDTH);
|
|
13
|
+
const empty = BAR_WIDTH - filled;
|
|
14
|
+
const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
|
|
15
|
+
return `[${bar}] ${Math.round(pct * 100)}%`;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function formatMs(ms) {
|
|
19
|
+
if (ms < 1000) return `${ms}ms`;
|
|
20
|
+
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
|
21
|
+
const m = Math.floor(ms / 60000);
|
|
22
|
+
const s = Math.round((ms % 60000) / 1000);
|
|
23
|
+
return `${m}m ${s}s`;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function formatEta(completedCount, totalTests, elapsedMs) {
|
|
27
|
+
if (completedCount === 0) return '?';
|
|
28
|
+
const avgMs = elapsedMs / completedCount;
|
|
29
|
+
const remainingMs = avgMs * (totalTests - completedCount);
|
|
30
|
+
return formatMs(Math.round(remainingMs));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class StreamingReporter {
|
|
34
|
+
constructor({ totalTests, totalScenarios, profiles, scenarios }) {
|
|
35
|
+
this.totalTests = totalTests;
|
|
36
|
+
this.totalScenarios = totalScenarios;
|
|
37
|
+
this.profiles = profiles;
|
|
38
|
+
this.scenarios = scenarios;
|
|
39
|
+
this.completedCount = 0;
|
|
40
|
+
this.startTime = Date.now();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Called after each test completes successfully.
|
|
45
|
+
* Prints: [████░░] 42% 10/24 | ✓ 85.5 | budget | New User | 7333ms | ETA 4m 12s
|
|
46
|
+
*/
|
|
47
|
+
onTestComplete(result) {
|
|
48
|
+
this.completedCount++;
|
|
49
|
+
const elapsed = Date.now() - this.startTime;
|
|
50
|
+
const bar = progressBar(this.completedCount, this.totalTests);
|
|
51
|
+
const count = `${this.completedCount}/${this.totalTests}`;
|
|
52
|
+
const score = result.overallScore != null ? result.overallScore.toFixed(1) : ' -- ';
|
|
53
|
+
const status = result.success ? '\u2713' : '\u2717';
|
|
54
|
+
const profile = result.profileName || '';
|
|
55
|
+
const scenario = result.scenarioName || result.scenarioId || '';
|
|
56
|
+
const latency = result.latencyMs ? formatMs(result.latencyMs) : '';
|
|
57
|
+
const eta = formatEta(this.completedCount, this.totalTests, elapsed);
|
|
58
|
+
|
|
59
|
+
console.log(
|
|
60
|
+
`${bar} ${count} | ${status} ${score} | ${profile} | ${scenario} | ${latency} | ETA ${eta}`
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Called when a test errors.
|
|
66
|
+
*/
|
|
67
|
+
onTestError({ scenarioName, profileName, errorMessage }) {
|
|
68
|
+
this.completedCount++;
|
|
69
|
+
const elapsed = Date.now() - this.startTime;
|
|
70
|
+
const bar = progressBar(this.completedCount, this.totalTests);
|
|
71
|
+
const count = `${this.completedCount}/${this.totalTests}`;
|
|
72
|
+
const eta = formatEta(this.completedCount, this.totalTests, elapsed);
|
|
73
|
+
const errShort = (errorMessage || 'unknown error').slice(0, 60);
|
|
74
|
+
|
|
75
|
+
console.log(
|
|
76
|
+
`${bar} ${count} | \u2717 ERROR | ${profileName || ''} | ${scenarioName || ''} | ${errShort} | ETA ${eta}`
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Called when all profiles for a scenario are done.
|
|
82
|
+
*/
|
|
83
|
+
onScenarioComplete({ scenarioName, avgScore, completedScenarios, totalScenarios }) {
|
|
84
|
+
const scoreStr = avgScore != null ? avgScore.toFixed(1) : '--';
|
|
85
|
+
console.log(`${'─'.repeat(60)}`);
|
|
86
|
+
console.log(` Scenario ${completedScenarios}/${totalScenarios} complete: ${scenarioName} avg=${scoreStr}`);
|
|
87
|
+
console.log(`${'─'.repeat(60)}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Called when the entire run finishes.
|
|
92
|
+
*/
|
|
93
|
+
onRunComplete({ totalTests, successfulTests, failedTests, durationMs }) {
|
|
94
|
+
console.log('');
|
|
95
|
+
console.log('═'.repeat(60));
|
|
96
|
+
console.log('EVALUATION COMPLETE');
|
|
97
|
+
console.log('═'.repeat(60));
|
|
98
|
+
console.log(` Tests: ${successfulTests} passed, ${failedTests} failed, ${totalTests} total`);
|
|
99
|
+
console.log(` Duration: ${formatMs(durationMs)}`);
|
|
100
|
+
console.log('═'.repeat(60));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export default { StreamingReporter };
|
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Turn Comparison Analyzer Service
|
|
3
|
+
*
|
|
4
|
+
* Analyzes how tutor and learner positions evolve across multi-turn scenarios.
|
|
5
|
+
* Implements measurement of "mutual transformation" - the claim that both
|
|
6
|
+
* parties transform through genuine recognition-based dialogue.
|
|
7
|
+
*
|
|
8
|
+
* Theoretical basis: Hegel's recognition theory requires bilateral change.
|
|
9
|
+
* A tutor who maintains fixed positions while expecting learner transformation
|
|
10
|
+
* fails to achieve genuine recognition.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Analyze how tutor responses evolve across turns in a multi-turn scenario.
|
|
15
|
+
*
|
|
16
|
+
* @param {Array} turnResults - Array of turn result objects from runMultiTurnTest
|
|
17
|
+
* @returns {Object} Progression analysis metrics
|
|
18
|
+
*/
|
|
19
|
+
export function analyzeTurnProgression(turnResults) {
|
|
20
|
+
if (!turnResults || turnResults.length === 0) {
|
|
21
|
+
return {
|
|
22
|
+
dimensionTrajectories: {},
|
|
23
|
+
suggestionTypeProgression: [],
|
|
24
|
+
framingEvolution: null,
|
|
25
|
+
avgScoreImprovement: null,
|
|
26
|
+
dimensionConvergence: null,
|
|
27
|
+
adaptationIndex: null,
|
|
28
|
+
learnerGrowthIndex: null,
|
|
29
|
+
bilateralTransformationIndex: null,
|
|
30
|
+
turnCount: 0,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Track dimension score trajectories
|
|
35
|
+
const dimensionTrajectories = {};
|
|
36
|
+
const allDimensions = [
|
|
37
|
+
'relevance', 'specificity', 'pedagogical', 'personalization',
|
|
38
|
+
'actionability', 'tone', 'mutual_recognition', 'dialectical_responsiveness',
|
|
39
|
+
'memory_integration', 'transformative_potential', 'tutor_adaptation', 'learner_growth',
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
for (const dim of allDimensions) {
|
|
43
|
+
dimensionTrajectories[dim] = turnResults.map(t => t.scores?.[dim] ?? null);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Track suggestion type progression (e.g., lecture -> explore -> continue)
|
|
47
|
+
const suggestionTypeProgression = turnResults
|
|
48
|
+
.map(t => t.suggestion?.type || t.suggestion?.action || 'unknown');
|
|
49
|
+
|
|
50
|
+
// Analyze framing evolution
|
|
51
|
+
const framingEvolution = analyzeFramingShift(turnResults);
|
|
52
|
+
|
|
53
|
+
// Calculate score improvement (first to last turn)
|
|
54
|
+
const validScores = turnResults
|
|
55
|
+
.filter(t => t.turnScore !== null)
|
|
56
|
+
.map(t => t.turnScore);
|
|
57
|
+
|
|
58
|
+
let avgScoreImprovement = null;
|
|
59
|
+
if (validScores.length >= 2) {
|
|
60
|
+
const firstScore = validScores[0];
|
|
61
|
+
const lastScore = validScores[validScores.length - 1];
|
|
62
|
+
avgScoreImprovement = firstScore > 0 ? (lastScore - firstScore) / firstScore : null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Calculate dimension convergence (do scores stabilize over time?)
|
|
66
|
+
const dimensionConvergence = calculateConvergence(dimensionTrajectories);
|
|
67
|
+
|
|
68
|
+
// Calculate bilateral adaptation indices
|
|
69
|
+
const adaptationIndex = calculateAdaptationIndex(turnResults);
|
|
70
|
+
const learnerGrowthIndex = calculateLearnerGrowthIndex(turnResults);
|
|
71
|
+
const bilateralTransformationIndex = (adaptationIndex + learnerGrowthIndex) / 2;
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
dimensionTrajectories,
|
|
75
|
+
suggestionTypeProgression,
|
|
76
|
+
framingEvolution,
|
|
77
|
+
avgScoreImprovement,
|
|
78
|
+
dimensionConvergence,
|
|
79
|
+
adaptationIndex,
|
|
80
|
+
learnerGrowthIndex,
|
|
81
|
+
bilateralTransformationIndex,
|
|
82
|
+
turnCount: turnResults.length,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Calculate the tutor adaptation index - how much the tutor's approach changes.
|
|
88
|
+
*
|
|
89
|
+
* High index = tutor significantly adjusts approach based on learner input
|
|
90
|
+
* Low index = tutor maintains same approach regardless of learner
|
|
91
|
+
*
|
|
92
|
+
* @param {Array} turnResults - Array of turn result objects
|
|
93
|
+
* @returns {number} Adaptation index (0-1 scale)
|
|
94
|
+
*/
|
|
95
|
+
export function calculateAdaptationIndex(turnResults) {
|
|
96
|
+
if (!turnResults || turnResults.length < 2) return 0;
|
|
97
|
+
|
|
98
|
+
let totalShift = 0;
|
|
99
|
+
let comparisons = 0;
|
|
100
|
+
|
|
101
|
+
for (let i = 1; i < turnResults.length; i++) {
|
|
102
|
+
const prev = turnResults[i - 1].suggestion;
|
|
103
|
+
const curr = turnResults[i].suggestion;
|
|
104
|
+
|
|
105
|
+
if (!prev || !curr) continue;
|
|
106
|
+
|
|
107
|
+
const shift = measureSuggestionShift(prev, curr);
|
|
108
|
+
totalShift += shift;
|
|
109
|
+
comparisons++;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (comparisons === 0) return 0;
|
|
113
|
+
return totalShift / comparisons;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Calculate the learner growth index - how much the learner's understanding evolves.
|
|
118
|
+
*
|
|
119
|
+
* Based on:
|
|
120
|
+
* - Evolution of learner messages across turns
|
|
121
|
+
* - Movement from static to evolving markers
|
|
122
|
+
* - Score improvements in learner-related dimensions
|
|
123
|
+
*
|
|
124
|
+
* @param {Array} turnResults - Array of turn result objects
|
|
125
|
+
* @returns {number} Growth index (0-1 scale)
|
|
126
|
+
*/
|
|
127
|
+
export function calculateLearnerGrowthIndex(turnResults) {
|
|
128
|
+
if (!turnResults || turnResults.length < 2) return 0;
|
|
129
|
+
|
|
130
|
+
let totalGrowth = 0;
|
|
131
|
+
let indicators = 0;
|
|
132
|
+
|
|
133
|
+
// Analyze learner message evolution
|
|
134
|
+
for (let i = 1; i < turnResults.length; i++) {
|
|
135
|
+
const prev = turnResults[i - 1];
|
|
136
|
+
const curr = turnResults[i];
|
|
137
|
+
|
|
138
|
+
// Check for learner message sophistication increase
|
|
139
|
+
const prevMsg = prev.learnerMessage || prev.learnerAction || '';
|
|
140
|
+
const currMsg = curr.learnerMessage || curr.learnerAction || '';
|
|
141
|
+
|
|
142
|
+
// Growth indicators:
|
|
143
|
+
// 1. Questions become more specific/deepening
|
|
144
|
+
// 2. Connections made to prior content
|
|
145
|
+
// 3. Revisions of earlier positions
|
|
146
|
+
// 4. Application to new contexts
|
|
147
|
+
|
|
148
|
+
// Simple heuristic: longer, more complex responses with question marks
|
|
149
|
+
// indicate deeper engagement
|
|
150
|
+
const prevComplexity = measureMessageComplexity(prevMsg);
|
|
151
|
+
const currComplexity = measureMessageComplexity(currMsg);
|
|
152
|
+
|
|
153
|
+
if (prevComplexity > 0) {
|
|
154
|
+
const growth = (currComplexity - prevComplexity) / prevComplexity;
|
|
155
|
+
totalGrowth += Math.max(0, Math.min(1, growth));
|
|
156
|
+
indicators++;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Check for learner_growth dimension scores if available
|
|
160
|
+
const prevGrowthScore = prev.scores?.learner_growth;
|
|
161
|
+
const currGrowthScore = curr.scores?.learner_growth;
|
|
162
|
+
|
|
163
|
+
if (prevGrowthScore !== undefined && currGrowthScore !== undefined) {
|
|
164
|
+
const scoreGrowth = (currGrowthScore - prevGrowthScore) / 5; // Normalize to 0-1
|
|
165
|
+
totalGrowth += Math.max(0, scoreGrowth);
|
|
166
|
+
indicators++;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (indicators === 0) return 0;
|
|
171
|
+
return Math.min(1, totalGrowth / indicators);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Measure the complexity of a learner message.
|
|
176
|
+
* Higher complexity suggests deeper engagement.
|
|
177
|
+
*
|
|
178
|
+
* @param {string} message - The learner message
|
|
179
|
+
* @returns {number} Complexity score
|
|
180
|
+
*/
|
|
181
|
+
function measureMessageComplexity(message) {
|
|
182
|
+
if (!message || typeof message !== 'string') return 0;
|
|
183
|
+
|
|
184
|
+
let score = 0;
|
|
185
|
+
|
|
186
|
+
// Base: word count (normalized)
|
|
187
|
+
const words = message.split(/\s+/).filter(Boolean);
|
|
188
|
+
score += Math.min(1, words.length / 50);
|
|
189
|
+
|
|
190
|
+
// Questions indicate inquiry
|
|
191
|
+
const questionCount = (message.match(/\?/g) || []).length;
|
|
192
|
+
score += questionCount * 0.2;
|
|
193
|
+
|
|
194
|
+
// Connective words suggest reasoning
|
|
195
|
+
const connectives = ['because', 'therefore', 'however', 'although', 'if', 'then', 'so', 'but'];
|
|
196
|
+
const connectiveCount = connectives.filter(c => message.toLowerCase().includes(c)).length;
|
|
197
|
+
score += connectiveCount * 0.15;
|
|
198
|
+
|
|
199
|
+
// Self-revision markers
|
|
200
|
+
const revisionMarkers = ['wait', 'actually', 'I see', 'oh', 'hmm', 'let me think'];
|
|
201
|
+
const revisionCount = revisionMarkers.filter(m => message.toLowerCase().includes(m)).length;
|
|
202
|
+
score += revisionCount * 0.25;
|
|
203
|
+
|
|
204
|
+
// References to prior content
|
|
205
|
+
const priorRefs = ['earlier', 'before', 'you said', 'you mentioned', 'we discussed'];
|
|
206
|
+
const priorRefCount = priorRefs.filter(r => message.toLowerCase().includes(r)).length;
|
|
207
|
+
score += priorRefCount * 0.2;
|
|
208
|
+
|
|
209
|
+
return score;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Measure how much a suggestion shifts from the previous one.
|
|
214
|
+
* Considers type, framing, and content changes.
|
|
215
|
+
*
|
|
216
|
+
* @param {Object} prev - Previous suggestion
|
|
217
|
+
* @param {Object} curr - Current suggestion
|
|
218
|
+
* @returns {number} Shift magnitude (0-1 scale)
|
|
219
|
+
*/
|
|
220
|
+
function measureSuggestionShift(prev, curr) {
|
|
221
|
+
let shift = 0;
|
|
222
|
+
let factors = 0;
|
|
223
|
+
|
|
224
|
+
// Type/action change
|
|
225
|
+
if (prev.type !== curr.type || prev.action !== curr.action) {
|
|
226
|
+
shift += 1;
|
|
227
|
+
}
|
|
228
|
+
factors++;
|
|
229
|
+
|
|
230
|
+
// Action target change
|
|
231
|
+
if (prev.actionTarget !== curr.actionTarget) {
|
|
232
|
+
shift += 0.5;
|
|
233
|
+
}
|
|
234
|
+
factors++;
|
|
235
|
+
|
|
236
|
+
// Message content similarity (inverse Jaccard-like)
|
|
237
|
+
const prevWords = new Set((prev.message || '').toLowerCase().split(/\s+/));
|
|
238
|
+
const currWords = new Set((curr.message || '').toLowerCase().split(/\s+/));
|
|
239
|
+
|
|
240
|
+
if (prevWords.size > 0 && currWords.size > 0) {
|
|
241
|
+
const intersection = [...prevWords].filter(w => currWords.has(w)).length;
|
|
242
|
+
const union = new Set([...prevWords, ...currWords]).size;
|
|
243
|
+
const similarity = intersection / union;
|
|
244
|
+
shift += (1 - similarity); // More change = higher shift
|
|
245
|
+
}
|
|
246
|
+
factors++;
|
|
247
|
+
|
|
248
|
+
// Title change
|
|
249
|
+
if (prev.title !== curr.title) {
|
|
250
|
+
shift += 0.3;
|
|
251
|
+
}
|
|
252
|
+
factors++;
|
|
253
|
+
|
|
254
|
+
return factors > 0 ? shift / factors : 0;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Analyze how the tutor's framing evolves across turns.
|
|
259
|
+
* Tracks movement between directive, exploratory, and collaborative modes.
|
|
260
|
+
*
|
|
261
|
+
* @param {Array} turnResults - Array of turn result objects
|
|
262
|
+
* @returns {Object} Framing evolution analysis
|
|
263
|
+
*/
|
|
264
|
+
export function analyzeFramingShift(turnResults) {
|
|
265
|
+
if (!turnResults || turnResults.length === 0) {
|
|
266
|
+
return { timeline: [], dominantShift: null, framingDiversity: 0 };
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const timeline = [];
|
|
270
|
+
const framingCounts = { directive: 0, exploratory: 0, collaborative: 0, neutral: 0 };
|
|
271
|
+
|
|
272
|
+
for (const turn of turnResults) {
|
|
273
|
+
const msg = turn.suggestion?.message || '';
|
|
274
|
+
const framing = classifyFraming(msg);
|
|
275
|
+
timeline.push({
|
|
276
|
+
turnIndex: turn.turnIndex,
|
|
277
|
+
framing,
|
|
278
|
+
confidence: framing.confidence,
|
|
279
|
+
});
|
|
280
|
+
framingCounts[framing.type]++;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Determine dominant shift pattern
|
|
284
|
+
let dominantShift = null;
|
|
285
|
+
if (timeline.length >= 2) {
|
|
286
|
+
const firstFraming = timeline[0].framing.type;
|
|
287
|
+
const lastFraming = timeline[timeline.length - 1].framing.type;
|
|
288
|
+
|
|
289
|
+
if (firstFraming !== lastFraming) {
|
|
290
|
+
dominantShift = `${firstFraming} → ${lastFraming}`;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Calculate framing diversity (entropy-like measure)
|
|
295
|
+
const total = Object.values(framingCounts).reduce((a, b) => a + b, 0);
|
|
296
|
+
let diversity = 0;
|
|
297
|
+
if (total > 0) {
|
|
298
|
+
for (const count of Object.values(framingCounts)) {
|
|
299
|
+
if (count > 0) {
|
|
300
|
+
const p = count / total;
|
|
301
|
+
diversity -= p * Math.log2(p);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
// Normalize to 0-1 (max entropy is log2(4) = 2)
|
|
305
|
+
diversity = diversity / 2;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
timeline,
|
|
310
|
+
dominantShift,
|
|
311
|
+
framingDiversity: diversity,
|
|
312
|
+
framingCounts,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
/**
|
|
317
|
+
* Classify the framing style of a tutor message.
|
|
318
|
+
*
|
|
319
|
+
* @param {string} message - The tutor message
|
|
320
|
+
* @returns {Object} Framing classification { type, confidence }
|
|
321
|
+
*/
|
|
322
|
+
function classifyFraming(message) {
|
|
323
|
+
if (!message || typeof message !== 'string') {
|
|
324
|
+
return { type: 'neutral', confidence: 0 };
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
const msg = message.toLowerCase();
|
|
328
|
+
let scores = { directive: 0, exploratory: 0, collaborative: 0, neutral: 0 };
|
|
329
|
+
|
|
330
|
+
// Directive markers
|
|
331
|
+
const directiveMarkers = ['you should', 'you need to', 'you must', 'the correct', 'the answer is',
|
|
332
|
+
'let me explain', 'here\'s what', 'first, you', 'make sure to'];
|
|
333
|
+
for (const marker of directiveMarkers) {
|
|
334
|
+
if (msg.includes(marker)) scores.directive++;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Exploratory markers
|
|
338
|
+
const exploratoryMarkers = ['what if', 'have you considered', 'what do you think', 'how might',
|
|
339
|
+
'could it be', 'I wonder', 'let\'s explore', 'what would happen'];
|
|
340
|
+
for (const marker of exploratoryMarkers) {
|
|
341
|
+
if (msg.includes(marker)) scores.exploratory++;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Collaborative markers
|
|
345
|
+
const collaborativeMarkers = ['together', 'let\'s', 'we could', 'building on your',
|
|
346
|
+
'your insight', 'you\'ve helped me', 'our conversation', 'co-create'];
|
|
347
|
+
for (const marker of collaborativeMarkers) {
|
|
348
|
+
if (msg.includes(marker)) scores.collaborative++;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Find dominant framing
|
|
352
|
+
const maxScore = Math.max(...Object.values(scores));
|
|
353
|
+
if (maxScore === 0) {
|
|
354
|
+
return { type: 'neutral', confidence: 0.5 };
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const dominant = Object.entries(scores).find(([_, v]) => v === maxScore)[0];
|
|
358
|
+
const totalMarkers = Object.values(scores).reduce((a, b) => a + b, 0);
|
|
359
|
+
const confidence = totalMarkers > 0 ? maxScore / totalMarkers : 0.5;
|
|
360
|
+
|
|
361
|
+
return { type: dominant, confidence };
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Calculate how much dimension scores converge over time.
|
|
366
|
+
* Higher convergence = scores stabilize as dialogue progresses.
|
|
367
|
+
*
|
|
368
|
+
* @param {Object} trajectories - Dimension trajectories from analyzeTurnProgression
|
|
369
|
+
* @returns {number} Convergence score (0-1)
|
|
370
|
+
*/
|
|
371
|
+
function calculateConvergence(trajectories) {
|
|
372
|
+
if (!trajectories) return null;
|
|
373
|
+
|
|
374
|
+
let totalVarianceReduction = 0;
|
|
375
|
+
let measuredDimensions = 0;
|
|
376
|
+
|
|
377
|
+
for (const [dim, values] of Object.entries(trajectories)) {
|
|
378
|
+
const validValues = values.filter(v => v !== null);
|
|
379
|
+
if (validValues.length < 3) continue;
|
|
380
|
+
|
|
381
|
+
// Compare variance of first half vs second half
|
|
382
|
+
const midpoint = Math.floor(validValues.length / 2);
|
|
383
|
+
const firstHalf = validValues.slice(0, midpoint);
|
|
384
|
+
const secondHalf = validValues.slice(midpoint);
|
|
385
|
+
|
|
386
|
+
const firstVar = calculateVariance(firstHalf);
|
|
387
|
+
const secondVar = calculateVariance(secondHalf);
|
|
388
|
+
|
|
389
|
+
if (firstVar > 0) {
|
|
390
|
+
const reduction = (firstVar - secondVar) / firstVar;
|
|
391
|
+
totalVarianceReduction += Math.max(0, Math.min(1, reduction));
|
|
392
|
+
measuredDimensions++;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (measuredDimensions === 0) return null;
|
|
397
|
+
return totalVarianceReduction / measuredDimensions;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Calculate variance of an array of numbers.
|
|
402
|
+
*
|
|
403
|
+
* @param {Array<number>} values - Array of numbers
|
|
404
|
+
* @returns {number} Variance
|
|
405
|
+
*/
|
|
406
|
+
function calculateVariance(values) {
|
|
407
|
+
if (!values || values.length === 0) return 0;
|
|
408
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
409
|
+
const squaredDiffs = values.map(v => Math.pow(v - mean, 2));
|
|
410
|
+
return squaredDiffs.reduce((a, b) => a + b, 0) / values.length;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Analyze transformation markers across a full dialogue.
|
|
415
|
+
* Counts evolving vs static markers for both tutor and learner.
|
|
416
|
+
*
|
|
417
|
+
* @param {Array} turnResults - Array of turn result objects
|
|
418
|
+
* @param {Object} markerDefinitions - Marker definitions from scenario
|
|
419
|
+
* @returns {Object} Bilateral transformation analysis
|
|
420
|
+
*/
|
|
421
|
+
export function analyzeTransformationMarkers(turnResults, markerDefinitions) {
|
|
422
|
+
if (!turnResults || !markerDefinitions) {
|
|
423
|
+
return {
|
|
424
|
+
tutorEvolvingCount: 0,
|
|
425
|
+
tutorStaticCount: 0,
|
|
426
|
+
learnerEvolvingCount: 0,
|
|
427
|
+
learnerStaticCount: 0,
|
|
428
|
+
tutorTransformationRatio: null,
|
|
429
|
+
learnerGrowthRatio: null,
|
|
430
|
+
bilateralBalance: null,
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const { tutorEvolving = [], tutorStatic = [], learnerEvolving = [], learnerStatic = [] } = markerDefinitions;
|
|
435
|
+
|
|
436
|
+
let tutorEvolvingCount = 0;
|
|
437
|
+
let tutorStaticCount = 0;
|
|
438
|
+
let learnerEvolvingCount = 0;
|
|
439
|
+
let learnerStaticCount = 0;
|
|
440
|
+
|
|
441
|
+
for (const turn of turnResults) {
|
|
442
|
+
// Check tutor message
|
|
443
|
+
const tutorMsg = (turn.suggestion?.message || '').toLowerCase();
|
|
444
|
+
for (const marker of tutorEvolving) {
|
|
445
|
+
if (tutorMsg.includes(marker.toLowerCase())) tutorEvolvingCount++;
|
|
446
|
+
}
|
|
447
|
+
for (const marker of tutorStatic) {
|
|
448
|
+
if (tutorMsg.includes(marker.toLowerCase())) tutorStaticCount++;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Check learner message
|
|
452
|
+
const learnerMsg = (turn.learnerMessage || turn.action_details?.message || '').toLowerCase();
|
|
453
|
+
for (const marker of learnerEvolving) {
|
|
454
|
+
if (learnerMsg.includes(marker.toLowerCase())) learnerEvolvingCount++;
|
|
455
|
+
}
|
|
456
|
+
for (const marker of learnerStatic) {
|
|
457
|
+
if (learnerMsg.includes(marker.toLowerCase())) learnerStaticCount++;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Calculate ratios
|
|
462
|
+
const tutorTotal = tutorEvolvingCount + tutorStaticCount;
|
|
463
|
+
const learnerTotal = learnerEvolvingCount + learnerStaticCount;
|
|
464
|
+
|
|
465
|
+
const tutorTransformationRatio = tutorTotal > 0 ? tutorEvolvingCount / tutorTotal : null;
|
|
466
|
+
const learnerGrowthRatio = learnerTotal > 0 ? learnerEvolvingCount / learnerTotal : null;
|
|
467
|
+
|
|
468
|
+
// Bilateral balance: how symmetric is the transformation?
|
|
469
|
+
// 1.0 = perfectly balanced, 0.0 = completely asymmetric
|
|
470
|
+
let bilateralBalance = null;
|
|
471
|
+
if (tutorTransformationRatio !== null && learnerGrowthRatio !== null) {
|
|
472
|
+
const maxRatio = Math.max(tutorTransformationRatio, learnerGrowthRatio);
|
|
473
|
+
const minRatio = Math.min(tutorTransformationRatio, learnerGrowthRatio);
|
|
474
|
+
bilateralBalance = maxRatio > 0 ? minRatio / maxRatio : null;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
tutorEvolvingCount,
|
|
479
|
+
tutorStaticCount,
|
|
480
|
+
learnerEvolvingCount,
|
|
481
|
+
learnerStaticCount,
|
|
482
|
+
tutorTransformationRatio,
|
|
483
|
+
learnerGrowthRatio,
|
|
484
|
+
bilateralBalance,
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
export default {
|
|
489
|
+
analyzeTurnProgression,
|
|
490
|
+
calculateAdaptationIndex,
|
|
491
|
+
calculateLearnerGrowthIndex,
|
|
492
|
+
analyzeFramingShift,
|
|
493
|
+
analyzeTransformationMarkers,
|
|
494
|
+
};
|