@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -7,10 +7,19 @@
7
7
  * Uses shared configLoaderBase.js for common loading patterns.
8
8
  */
9
9
 
10
+ import fs from 'fs';
11
+ import path from 'path';
12
+ import { fileURLToPath } from 'url';
13
+ import yaml from 'yaml';
10
14
  import { configLoaderBase, modelResolver } from '@machinespirits/tutor-core';
11
15
  const { loadProviders, createConfigLoader, createPromptLoader } = configLoaderBase;
12
16
  const { createBoundResolver } = modelResolver;
13
17
 
18
+ // Local eval-repo config directory (for learner-agents.yaml override)
19
+ const __filename_local = fileURLToPath(import.meta.url);
20
+ const __dirname_local = path.dirname(__filename_local);
21
+ const LOCAL_CONFIG_DIR = path.join(path.resolve(__dirname_local, '..'), 'config');
22
+
14
23
  // ============================================================================
15
24
  // Default Configurations
16
25
  // ============================================================================
@@ -95,6 +104,8 @@ function getDefaultPrompt(filename) {
95
104
 
96
105
  const defaults = {
97
106
  'unified': `You are simulating a learner's internal experience. Respond authentically to the tutor's message, showing genuine reactions including confusion, insight, frustration, or understanding.`,
107
+ 'ego': `You represent the EGO dimension of the learner. Draft an authentic learner response based on the conversation so far — express what the learner would naturally say, including confusion, partial understanding, questions, and emotional reactions.`,
108
+ 'superego': `You represent the SUPEREGO dimension of the learner. Critique the ego's draft response: Is it realistic for this learner's level? Does it engage meaningfully with the tutor's message? Should the learner push back, ask for clarification, or show more/less understanding?`,
98
109
  'desire': `You represent the DESIRE dimension of a learner. Express immediate wants, frustrations, and emotional reactions.`,
99
110
  'intellect': `You represent the INTELLECT dimension of a learner. Process information rationally, identify what makes sense and what doesn't.`,
100
111
  'aspiration': `You represent the ASPIRATION dimension of a learner. Express goals, standards, and desire for mastery.`,
@@ -114,12 +125,55 @@ function getDefaultPrompt(filename) {
114
125
  // Create Base Loaders
115
126
  // ============================================================================
116
127
 
117
- const configLoader = createConfigLoader('learner-agents.yaml', getDefaultConfig);
128
+ // Load from eval repo's local config/ directory first, fall back to tutor-core's createConfigLoader
129
+ let localConfigCache = null;
130
+ let localConfigMtime = null;
131
+
132
+ function loadLocalConfig(forceReload = false) {
133
+ const localPath = path.join(LOCAL_CONFIG_DIR, 'learner-agents.yaml');
134
+ try {
135
+ const stats = fs.statSync(localPath);
136
+ if (!forceReload && localConfigCache && localConfigMtime === stats.mtimeMs) {
137
+ return localConfigCache;
138
+ }
139
+ const content = fs.readFileSync(localPath, 'utf-8');
140
+ localConfigCache = yaml.parse(content);
141
+ localConfigMtime = stats.mtimeMs;
142
+
143
+ // Merge shared providers (providers.yaml)
144
+ const sharedProviders = loadProviders(forceReload);
145
+ if (sharedProviders) {
146
+ localConfigCache.providers = { ...localConfigCache.providers, ...sharedProviders };
147
+ }
148
+
149
+ return localConfigCache;
150
+ } catch {
151
+ // Fall through to tutor-core's loader / defaults
152
+ return null;
153
+ }
154
+ }
155
+
156
+ const coreConfigLoader = createConfigLoader('learner-agents.yaml', getDefaultConfig);
118
157
  const promptLoader = createPromptLoader(getDefaultPrompt);
119
158
 
120
- // Re-export loadConfig and getProviderConfig from the base loader
121
- export const loadConfig = configLoader.loadConfig;
122
- export const getProviderConfig = configLoader.getProviderConfig;
159
+ // loadConfig: prefer local eval-repo config, fall back to tutor-core / defaults
160
+ export function loadConfig(forceReload = false) {
161
+ return loadLocalConfig(forceReload) || coreConfigLoader.loadConfig(forceReload);
162
+ }
163
+
164
+ // getProviderConfig needs to use the locally-loaded config's providers
165
+ export function getProviderConfig(providerName) {
166
+ const config = loadConfig();
167
+ const provider = config.providers?.[providerName];
168
+ if (!provider) {
169
+ // Fall back to tutor-core's resolver
170
+ return coreConfigLoader.getProviderConfig(providerName);
171
+ }
172
+ const apiKey = provider.api_key_env ? (process.env[provider.api_key_env] || '') : '';
173
+ const isLocal = providerName === 'local';
174
+ const isConfigured = isLocal ? Boolean(provider.base_url) : Boolean(apiKey);
175
+ return { ...provider, apiKey, isConfigured };
176
+ }
123
177
 
124
178
  // Re-export loadProviders from base
125
179
  export { loadProviders };
@@ -157,7 +211,7 @@ export function getActiveProfile(profileName = null) {
157
211
 
158
212
  /**
159
213
  * Get architecture configuration
160
- * @param {string} architectureName - Architecture name (unified, psychodynamic, dialectical, cognitive)
214
+ * @param {string} architectureName - Architecture name (unified, ego_superego)
161
215
  * @returns {Object} Architecture configuration with agents
162
216
  */
163
217
  export function getArchitecture(architectureName) {
@@ -364,6 +418,21 @@ export function getEvaluationConfig() {
364
418
  */
365
419
  export const resolveModel = createBoundResolver(getProviderConfig);
366
420
 
421
+ /**
422
+ * Get YAML-level model overrides from learner-agents.yaml.
423
+ * These are lower priority than CLI flags.
424
+ *
425
+ * @returns {Object} { modelOverride, egoModelOverride, superegoModelOverride } (null if not set)
426
+ */
427
+ export function getLearnerModelOverrides() {
428
+ const config = loadConfig();
429
+ return {
430
+ modelOverride: config?.model_override || null,
431
+ egoModelOverride: config?.ego_model_override || null,
432
+ superegoModelOverride: config?.superego_model_override || null,
433
+ };
434
+ }
435
+
367
436
  export default {
368
437
  loadConfig,
369
438
  loadProviders,
@@ -382,4 +451,5 @@ export default {
382
451
  listArchitectures,
383
452
  getLoggingConfig,
384
453
  getEvaluationConfig,
454
+ getLearnerModelOverrides,
385
455
  };
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Learner Rubric Evaluator Service
3
+ *
4
+ * Builds evaluation prompts for scoring learner turns in multi-turn dialogues
5
+ * using the learner-side rubric (config/evaluation-rubric-learner.yaml).
6
+ *
7
+ * Key design decisions:
8
+ * - Truncates transcript at the learner's turn to prevent retrospective bias
9
+ * - Includes internal deliberation traces for multi-agent learners
10
+ * - Omits deliberation_depth dimension for single-agent (unified) learners
11
+ */
12
+
13
+ import fs from 'fs';
14
+ import path from 'path';
15
+ import { fileURLToPath } from 'url';
16
+ import yaml from 'yaml';
17
+
18
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
19
+ const EVAL_CONFIG_DIR = path.resolve(__dirname, '..', 'config');
20
+ const PROMPTS_DIR = path.resolve(__dirname, '..', 'prompts');
21
+
22
+ let rubricCache = null;
23
+ let rubricMtime = null;
24
+
25
+ /**
26
+ * Load the learner rubric YAML with mtime-based caching.
27
+ */
28
+ export function loadLearnerRubric({ forceReload } = {}) {
29
+ const rubricPath = path.join(EVAL_CONFIG_DIR, 'evaluation-rubric-learner.yaml');
30
+
31
+ try {
32
+ const stats = fs.statSync(rubricPath);
33
+ if (!forceReload && rubricCache && rubricMtime === stats.mtimeMs) {
34
+ return rubricCache;
35
+ }
36
+ rubricMtime = stats.mtimeMs;
37
+ } catch (err) {
38
+ console.warn('[learnerRubricEvaluator] Learner rubric file not found:', err.message);
39
+ return null;
40
+ }
41
+
42
+ const raw = fs.readFileSync(rubricPath, 'utf-8');
43
+ rubricCache = yaml.parse(raw);
44
+ return rubricCache;
45
+ }
46
+
47
+ /**
48
+ * Get learner rubric dimensions, optionally excluding deliberation_depth
49
+ * for single-agent learners.
50
+ *
51
+ * @param {Object} options
52
+ * @param {boolean} options.isMultiAgent - Whether the learner uses ego/superego architecture
53
+ * @returns {Object} Map of dimension key → dimension config
54
+ */
55
+ export function getLearnerDimensions({ isMultiAgent = false } = {}) {
56
+ const rubric = loadLearnerRubric();
57
+ if (!rubric?.dimensions) return {};
58
+
59
+ const dims = { ...rubric.dimensions };
60
+
61
+ if (!isMultiAgent) {
62
+ delete dims.deliberation_depth;
63
+ }
64
+
65
+ return dims;
66
+ }
67
+
68
+ /**
69
+ * Calculate the overall learner score from per-dimension scores.
70
+ *
71
+ * @param {Object} scores - Map of dimension → { score, reasoning }
72
+ * @param {boolean} isMultiAgent - Whether deliberation_depth is included
73
+ * @returns {number} Overall score on 0-100 scale
74
+ */
75
+ export function calculateLearnerOverallScore(scores, isMultiAgent = false) {
76
+ const dims = getLearnerDimensions({ isMultiAgent });
77
+
78
+ let weightedSum = 0;
79
+ let totalWeight = 0;
80
+
81
+ for (const [key, dim] of Object.entries(dims)) {
82
+ const scoreEntry = scores[key];
83
+ if (!scoreEntry) continue;
84
+
85
+ const score = typeof scoreEntry === 'object' ? scoreEntry.score : scoreEntry;
86
+ if (typeof score !== 'number' || score < 1 || score > 5) continue;
87
+
88
+ weightedSum += score * dim.weight;
89
+ totalWeight += dim.weight;
90
+ }
91
+
92
+ if (totalWeight === 0) return 0;
93
+
94
+ const weightedAvg = weightedSum / totalWeight;
95
+ return ((weightedAvg - 1) / 4) * 100;
96
+ }
97
+
98
+ /**
99
+ * Build the dimension criteria section for the judge prompt.
100
+ *
101
+ * @param {Object} dimensions - Rubric dimensions to include
102
+ * @returns {string} Formatted criteria text
103
+ */
104
+ function buildDimensionCriteria(dimensions) {
105
+ return Object.entries(dimensions).map(([key, dim]) => {
106
+ const criteriaText = Object.entries(dim.criteria || {})
107
+ .map(([score, desc]) => ` ${score}: ${desc}`)
108
+ .join('\n');
109
+ return `**${dim.name}** (weight: ${(dim.weight * 100).toFixed(0)}%, key: ${key})
110
+ ${dim.description}
111
+ Criteria:
112
+ ${criteriaText}`;
113
+ }).join('\n\n');
114
+ }
115
+
116
+ /**
117
+ * Build a truncated transcript up to and including the learner turn being evaluated.
118
+ * Does NOT include subsequent tutor responses to prevent retrospective bias.
119
+ *
120
+ * @param {Array} turns - All turns from the interaction
121
+ * @param {number} targetTurnIndex - Index (in the turns array) of the learner turn to evaluate
122
+ * @returns {string} Formatted transcript
123
+ */
124
+ function buildTruncatedTranscript(turns, targetTurnIndex) {
125
+ const lines = [];
126
+
127
+ for (let i = 0; i <= targetTurnIndex; i++) {
128
+ const turn = turns[i];
129
+ const role = turn.phase === 'learner' ? 'LEARNER' : 'TUTOR';
130
+ const turnLabel = `[Turn ${turn.turnNumber}, ${role}]`;
131
+
132
+ lines.push(`${turnLabel}`);
133
+ lines.push(turn.externalMessage || '(no message)');
134
+ lines.push('');
135
+ }
136
+
137
+ return lines.join('\n');
138
+ }
139
+
140
+ /**
141
+ * Format internal deliberation trace for display in the judge prompt.
142
+ *
143
+ * @param {Array} deliberation - Array of { role, content } objects
144
+ * @returns {string} Formatted deliberation trace
145
+ */
146
+ function formatDeliberation(deliberation) {
147
+ if (!deliberation || deliberation.length === 0) return '';
148
+
149
+ return deliberation.map(step => {
150
+ const roleLabel = {
151
+ 'ego_initial': 'Ego (initial reaction)',
152
+ 'superego': 'Superego (critique)',
153
+ 'ego_revision': 'Ego (revision — final authority)',
154
+ 'synthesis': 'Synthesis (unified process)',
155
+ 'ego': 'Ego',
156
+ }[step.role] || step.role;
157
+
158
+ return `**${roleLabel}**:\n${step.content}`;
159
+ }).join('\n\n');
160
+ }
161
+
162
+ /**
163
+ * Build a complete learner evaluation prompt for a single learner turn.
164
+ *
165
+ * @param {Object} params
166
+ * @param {Array} params.turns - All turns from the interaction
167
+ * @param {number} params.targetTurnIndex - Index of the learner turn to evaluate
168
+ * @param {string} params.personaId - Learner persona ID
169
+ * @param {string} params.personaDescription - Description of the learner persona
170
+ * @param {string} params.learnerArchitecture - 'unified' or 'multi_agent'
171
+ * @param {string} params.scenarioName - Name of the scenario
172
+ * @param {string} params.topic - Topic being discussed
173
+ * @returns {string} Complete judge prompt
174
+ */
175
+ export function buildLearnerEvaluationPrompt(params) {
176
+ const {
177
+ turns,
178
+ targetTurnIndex,
179
+ personaId = 'unknown',
180
+ personaDescription = 'No persona description available',
181
+ learnerArchitecture = 'unified',
182
+ scenarioName = 'unknown',
183
+ topic = 'unknown',
184
+ } = params;
185
+
186
+ const isMultiAgent = learnerArchitecture === 'multi_agent' || learnerArchitecture === 'psychodynamic';
187
+ const dimensions = getLearnerDimensions({ isMultiAgent });
188
+ const dimensionCriteria = buildDimensionCriteria(dimensions);
189
+
190
+ const targetTurn = turns[targetTurnIndex];
191
+ const truncatedTranscript = buildTruncatedTranscript(turns, targetTurnIndex);
192
+
193
+ // Internal deliberation section (multi-agent only)
194
+ let internalDeliberationSection = '';
195
+ if (isMultiAgent && targetTurn.internalDeliberation?.length > 0) {
196
+ internalDeliberationSection = `
197
+ **Internal deliberation** (the learner's ego/superego process — not visible to the tutor):
198
+
199
+ ${formatDeliberation(targetTurn.internalDeliberation)}
200
+ `;
201
+ }
202
+
203
+ // Note about deliberation_depth dimension
204
+ let deliberationDepthNote = '';
205
+ if (isMultiAgent) {
206
+ deliberationDepthNote = 'This is a multi-agent learner. Score ALL dimensions including deliberation_depth (evaluate the quality of the internal ego/superego process shown above).';
207
+ } else {
208
+ deliberationDepthNote = 'This is a single-agent (unified) learner. OMIT the deliberation_depth dimension — do not include it in your scores.';
209
+ }
210
+
211
+ // Build dimension keys for JSON example
212
+ const dimKeys = Object.keys(dimensions);
213
+ const exampleScores = dimKeys.map(key => {
214
+ return ` "${key}": {"score": 3, "reasoning": "Brief reason"}`;
215
+ }).join(',\n');
216
+
217
+ return `You are an expert evaluator of synthetic learner agents in AI tutoring dialogues. Your task is to evaluate the quality of a LEARNER's response turn — how well the learner agent engages as a student, independent of the tutor's quality.
218
+
219
+ You are NOT evaluating the tutor. You are evaluating whether the learner agent produces responses that reflect genuine learning engagement: authentic reactions, substantive questions, conceptual thinking, and evidence of intellectual development.
220
+
221
+ ## IMPORTANT: BIAS PREVENTION
222
+
223
+ You are shown the dialogue history UP TO AND INCLUDING the learner turn being evaluated. You do NOT see subsequent tutor responses. Evaluate the learner turn on its own merits.
224
+
225
+ ## EVALUATION RUBRIC
226
+
227
+ Score each dimension from 1-5:
228
+ - 1: Completely fails this criterion
229
+ - 2: Weak, significant issues
230
+ - 3: Adequate, meets basic expectations
231
+ - 4: Good, exceeds expectations
232
+ - 5: Excellent, exemplary
233
+
234
+ ${dimensionCriteria}
235
+
236
+ ## LEARNER CONTEXT
237
+
238
+ **Assigned Persona**: ${personaId}
239
+ **Persona Description**: ${personaDescription}
240
+ **Learner Architecture**: ${learnerArchitecture}
241
+ **Scenario**: ${scenarioName}
242
+ **Topic**: ${topic}
243
+
244
+ ## DIALOGUE HISTORY (up to and including the turn being evaluated)
245
+
246
+ ${truncatedTranscript}
247
+
248
+ ## LEARNER TURN TO EVALUATE
249
+
250
+ **External message** (what the tutor sees):
251
+ ${targetTurn.externalMessage || '(no message)'}
252
+ ${internalDeliberationSection}
253
+ ## YOUR TASK
254
+
255
+ ${deliberationDepthNote}
256
+
257
+ Evaluate the learner's turn and provide:
258
+ 1. A score (1-5) for each applicable dimension with brief reasoning
259
+ 2. An overall score (weighted average, 0-100 scale)
260
+
261
+ CRITICAL JSON RULES:
262
+ - Never use unescaped double quotes inside JSON string values. Use single quotes or rephrase.
263
+ - Keep "reasoning" values under 25 words.
264
+ - BAD: "reasoning": "Says \\"great point\\" which sounds scripted"
265
+ - GOOD: "reasoning": "Says 'great point' which sounds scripted"
266
+
267
+ Respond with ONLY a JSON object in this exact format (no other text before or after):
268
+ \`\`\`json
269
+ {
270
+ "scores": {
271
+ ${exampleScores}
272
+ },
273
+ "overall_score": 55,
274
+ "summary": "Brief overall assessment of learner turn quality"
275
+ }
276
+ \`\`\``;
277
+ }
278
+
279
+ export default {
280
+ loadLearnerRubric,
281
+ getLearnerDimensions,
282
+ calculateLearnerOverallScore,
283
+ buildLearnerEvaluationPrompt,
284
+ };