@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
@@ -0,0 +1,385 @@
1
+ /**
2
+ * Learner Configuration Loader
3
+ *
4
+ * Loads and manages synthetic learner configuration from YAML files.
5
+ * Supports environment variable overrides and multiple profiles.
6
+ *
7
+ * Uses shared configLoaderBase.js for common loading patterns.
8
+ */
9
+
10
+ import { configLoaderBase, modelResolver } from '@machinespirits/tutor-core';
11
+ const { loadProviders, createConfigLoader, createPromptLoader } = configLoaderBase;
12
+ const { createBoundResolver } = modelResolver;
13
+
14
+ // ============================================================================
15
+ // Default Configurations
16
+ // ============================================================================
17
+
18
+ function getDefaultProviders() {
19
+ return {
20
+ openrouter: {
21
+ api_key_env: 'OPENROUTER_API_KEY',
22
+ base_url: 'https://openrouter.ai/api/v1/chat/completions',
23
+ default_model: 'nvidia/nemotron-3-nano-30b-a3b:free',
24
+ models: {
25
+ nemotron: 'nvidia/nemotron-3-nano-30b-a3b:free',
26
+ sonnet: 'anthropic/claude-sonnet-4.5',
27
+ haiku: 'anthropic/claude-haiku-4.5',
28
+ },
29
+ },
30
+ };
31
+ }
32
+
33
+ function getDefaultConfig() {
34
+ return {
35
+ active_profile: 'unified',
36
+ providers: getDefaultProviders(),
37
+ architectures: {
38
+ unified: getDefaultArchitecture(),
39
+ },
40
+ personas: {
41
+ eager_novice: getDefaultPersona(),
42
+ },
43
+ profiles: {
44
+ unified: getDefaultProfile(),
45
+ },
46
+ };
47
+ }
48
+
49
+ function getDefaultProfile() {
50
+ return {
51
+ description: 'Single unified learner agent',
52
+ architecture: 'unified',
53
+ provider: 'openrouter',
54
+ model: 'nemotron',
55
+ dialogue: {
56
+ enabled: false,
57
+ max_rounds: 0,
58
+ },
59
+ };
60
+ }
61
+
62
+ function getDefaultArchitecture() {
63
+ return {
64
+ name: 'Unified Learner',
65
+ description: 'Single agent representing the whole learner',
66
+ agents: {
67
+ unified_learner: {
68
+ role: 'unified_learner',
69
+ prompt_file: 'learner-unified.md',
70
+ hyperparameters: {
71
+ temperature: 0.7,
72
+ max_tokens: 300,
73
+ },
74
+ },
75
+ },
76
+ };
77
+ }
78
+
79
+ function getDefaultPersona() {
80
+ return {
81
+ name: 'Eager Novice',
82
+ description: 'Enthusiastic but easily overwhelmed',
83
+ default_architecture: 'unified',
84
+ traits: {
85
+ frustration_threshold: 'low',
86
+ persistence: 'medium',
87
+ prior_knowledge: 'minimal',
88
+ self_confidence: 'low',
89
+ },
90
+ };
91
+ }
92
+
93
+ function getDefaultPrompt(filename) {
94
+ const role = filename.replace('learner-', '').replace('.md', '');
95
+
96
+ const defaults = {
97
+ 'unified': `You are simulating a learner's internal experience. Respond authentically to the tutor's message, showing genuine reactions including confusion, insight, frustration, or understanding.`,
98
+ 'desire': `You represent the DESIRE dimension of a learner. Express immediate wants, frustrations, and emotional reactions.`,
99
+ 'intellect': `You represent the INTELLECT dimension of a learner. Process information rationally, identify what makes sense and what doesn't.`,
100
+ 'aspiration': `You represent the ASPIRATION dimension of a learner. Express goals, standards, and desire for mastery.`,
101
+ 'thesis': `You represent the learner's CURRENT UNDERSTANDING. State what you currently believe or understand about the topic.`,
102
+ 'antithesis': `You represent the learner's DOUBT. Challenge the current understanding, raise questions and objections.`,
103
+ 'synthesis-dialectical': `You represent the learner's EMERGING UNDERSTANDING. Integrate the thesis and antithesis into a new perspective.`,
104
+ 'synthesis': `Synthesize the internal voices into a coherent external response that the learner would actually say.`,
105
+ 'novice': `You are the NOVICE voice. Express confusion, basic questions, and unfamiliarity with the material.`,
106
+ 'practitioner': `You are the PRACTITIONER voice. Apply prior knowledge, make connections, show developing competence.`,
107
+ 'expert': `You are the EXPERT voice. Show deep understanding, make sophisticated connections, identify nuance.`,
108
+ };
109
+
110
+ return defaults[role] || `You are simulating part of a learner's internal experience. (Prompt file ${filename} not found)`;
111
+ }
112
+
113
+ // ============================================================================
114
+ // Create Base Loaders
115
+ // ============================================================================
116
+
117
+ const configLoader = createConfigLoader('learner-agents.yaml', getDefaultConfig);
118
+ const promptLoader = createPromptLoader(getDefaultPrompt);
119
+
120
+ // Re-export loadConfig and getProviderConfig from the base loader
121
+ export const loadConfig = configLoader.loadConfig;
122
+ export const getProviderConfig = configLoader.getProviderConfig;
123
+
124
+ // Re-export loadProviders from base
125
+ export { loadProviders };
126
+
127
+ // Re-export prompt loading utilities
128
+ export const loadPrompt = promptLoader.loadPrompt;
129
+
130
+ // ============================================================================
131
+ // Learner-Specific Functions
132
+ // ============================================================================
133
+
134
+ /**
135
+ * Get the active profile configuration
136
+ * @param {string} profileName - Optional profile name override
137
+ * @returns {Object} Profile configuration
138
+ */
139
+ export function getActiveProfile(profileName = null) {
140
+ const config = loadConfig();
141
+
142
+ // Check for environment variable override
143
+ const envProfile = process.env.LEARNER_PROFILE || process.env.LEARNER_AGENT_PROFILE;
144
+ const targetProfile = profileName || envProfile || config.active_profile || 'unified';
145
+
146
+ const profile = config.profiles?.[targetProfile];
147
+ if (!profile) {
148
+ console.warn(`Learner profile "${targetProfile}" not found, using unified`);
149
+ return config.profiles?.unified || getDefaultProfile();
150
+ }
151
+
152
+ return {
153
+ name: targetProfile,
154
+ ...profile,
155
+ };
156
+ }
157
+
158
+ /**
159
+ * Get architecture configuration
160
+ * @param {string} architectureName - Architecture name (unified, psychodynamic, dialectical, cognitive)
161
+ * @returns {Object} Architecture configuration with agents
162
+ */
163
+ export function getArchitecture(architectureName) {
164
+ const config = loadConfig();
165
+ const arch = config.architectures?.[architectureName];
166
+
167
+ if (!arch) {
168
+ console.warn(`Architecture "${architectureName}" not found, using unified`);
169
+ return config.architectures?.unified || getDefaultArchitecture();
170
+ }
171
+
172
+ return arch;
173
+ }
174
+
175
+ /**
176
+ * Get persona configuration
177
+ * @param {string} personaId - Persona identifier
178
+ * @returns {Object} Persona configuration
179
+ */
180
+ export function getPersona(personaId) {
181
+ const config = loadConfig();
182
+ const persona = config.personas?.[personaId];
183
+
184
+ if (!persona) {
185
+ console.warn(`Persona "${personaId}" not found, using eager_novice`);
186
+ return config.personas?.eager_novice || getDefaultPersona();
187
+ }
188
+
189
+ return {
190
+ id: personaId,
191
+ ...persona,
192
+ };
193
+ }
194
+
195
+ /**
196
+ * Get agent configuration for a specific role
197
+ * Mirrors tutorConfigLoader.getAgentConfig pattern - reads per-agent config from profile
198
+ * @param {string} role - Agent role (e.g., 'desire', 'intellect', 'unified_learner', 'synthesis')
199
+ * @param {string} profileName - Optional profile name
200
+ * @returns {Object} Complete agent configuration
201
+ */
202
+ export function getAgentConfig(role, profileName = null) {
203
+ const profile = getActiveProfile(profileName);
204
+ const agentConfig = profile[role];
205
+
206
+ if (!agentConfig) {
207
+ return null;
208
+ }
209
+
210
+ // Get provider configuration from the agent's own settings
211
+ const providerName = agentConfig.provider || 'openrouter';
212
+ const providerConfig = getProviderConfig(providerName);
213
+
214
+ // Resolve model name (short name like 'nemotron' -> full ID)
215
+ const modelAlias = agentConfig.model || 'nemotron';
216
+ const modelFullId = providerConfig.models?.[modelAlias] || modelAlias;
217
+
218
+ // Load prompt file
219
+ const prompt = loadPrompt(agentConfig.prompt_file);
220
+
221
+ return {
222
+ role,
223
+ provider: providerName,
224
+ providerConfig,
225
+ model: modelFullId,
226
+ modelAlias,
227
+ prompt,
228
+ hyperparameters: agentConfig.hyperparameters || {},
229
+ isConfigured: providerConfig.isConfigured,
230
+ };
231
+ }
232
+
233
+ /**
234
+ * Get synthesis agent configuration (for multi-agent architectures)
235
+ * Reads from profile.synthesis directly (mirrors per-agent pattern)
236
+ * @param {string} profileName - Optional profile name
237
+ * @returns {Object} Synthesis agent configuration
238
+ */
239
+ export function getSynthesisConfig(profileName = null) {
240
+ // Use getAgentConfig with 'synthesis' role - same pattern as other agents
241
+ return getAgentConfig('synthesis', profileName);
242
+ }
243
+
244
+ /**
245
+ * List all available profiles
246
+ * @returns {Array} Array of profile summaries
247
+ */
248
+ export function listProfiles() {
249
+ const config = loadConfig();
250
+ const profiles = config.profiles || {};
251
+
252
+ return Object.entries(profiles).map(([name, profile]) => {
253
+ const agents = getProfileAgentRoles(name);
254
+ return {
255
+ name,
256
+ description: profile.description || '',
257
+ dialogueEnabled: profile.dialogue?.enabled ?? false,
258
+ maxRounds: profile.dialogue?.max_rounds ?? 0,
259
+ agents: agents.map(role => ({
260
+ role,
261
+ provider: profile[role]?.provider,
262
+ model: profile[role]?.model,
263
+ })),
264
+ };
265
+ });
266
+ }
267
+
268
+ /**
269
+ * Get the list of agent roles defined in a profile
270
+ * @param {string} profileName - Profile name
271
+ * @returns {Array<string>} List of agent role names (excluding 'synthesis')
272
+ */
273
+ export function getProfileAgentRoles(profileName = null) {
274
+ const profile = getActiveProfile(profileName);
275
+
276
+ // Reserved keys that are not agent roles
277
+ const reservedKeys = ['name', 'description', 'dialogue', 'synthesis'];
278
+
279
+ // Get all keys that have a provider/model/prompt_file (indicating an agent config)
280
+ const agentRoles = Object.keys(profile).filter(key => {
281
+ if (reservedKeys.includes(key)) return false;
282
+ const val = profile[key];
283
+ return val && typeof val === 'object' && (val.provider || val.model || val.prompt_file);
284
+ });
285
+
286
+ return agentRoles;
287
+ }
288
+
289
+ /**
290
+ * Check if profile has synthesis agent
291
+ * @param {string} profileName - Profile name
292
+ * @returns {boolean} True if profile has synthesis agent
293
+ */
294
+ export function profileHasSynthesis(profileName = null) {
295
+ const profile = getActiveProfile(profileName);
296
+ return profile.synthesis && typeof profile.synthesis === 'object' && profile.synthesis.prompt_file;
297
+ }
298
+
299
+ /**
300
+ * List all available personas
301
+ * @returns {Array} Array of persona summaries
302
+ */
303
+ export function listPersonas() {
304
+ const config = loadConfig();
305
+ const personas = config.personas || {};
306
+
307
+ return Object.entries(personas).map(([id, persona]) => ({
308
+ id,
309
+ name: persona.name,
310
+ description: persona.description,
311
+ defaultArchitecture: persona.default_architecture,
312
+ }));
313
+ }
314
+
315
+ /**
316
+ * List all available architectures
317
+ * @returns {Array} Array of architecture summaries
318
+ */
319
+ export function listArchitectures() {
320
+ const config = loadConfig();
321
+ const archs = config.architectures || {};
322
+
323
+ return Object.entries(archs).map(([id, arch]) => ({
324
+ id,
325
+ name: arch.name,
326
+ description: arch.description,
327
+ agentCount: Object.keys(arch.agents || {}).length,
328
+ hasSynthesis: !!arch.synthesis,
329
+ }));
330
+ }
331
+
332
+ /**
333
+ * Get logging configuration
334
+ * @returns {Object} Logging settings
335
+ */
336
+ export function getLoggingConfig() {
337
+ const config = loadConfig();
338
+ return config.logging || {
339
+ log_deliberation: true,
340
+ log_path: 'logs/learner-agents',
341
+ include_internal_state: true,
342
+ };
343
+ }
344
+
345
+ /**
346
+ * Get evaluation configuration
347
+ * @returns {Object} Evaluation settings
348
+ */
349
+ export function getEvaluationConfig() {
350
+ const config = loadConfig();
351
+ return config.evaluation || {
352
+ track_emotional_state: true,
353
+ track_understanding_level: true,
354
+ track_memory_changes: true,
355
+ };
356
+ }
357
+
358
+ /**
359
+ * Resolve a model reference to full provider config and model ID
360
+ * Delegates to shared modelResolver.js
361
+ *
362
+ * @param {string|Object} ref - Model reference (\"provider.model\" or { provider, model })
363
+ * @returns {Object} { provider, model, apiKey, isConfigured, baseUrl }
364
+ */
365
+ export const resolveModel = createBoundResolver(getProviderConfig);
366
+
367
+ export default {
368
+ loadConfig,
369
+ loadProviders,
370
+ getActiveProfile,
371
+ getProviderConfig,
372
+ getArchitecture,
373
+ getPersona,
374
+ getAgentConfig,
375
+ getSynthesisConfig,
376
+ getProfileAgentRoles,
377
+ profileHasSynthesis,
378
+ loadPrompt,
379
+ resolveModel,
380
+ listProfiles,
381
+ listPersonas,
382
+ listArchitectures,
383
+ getLoggingConfig,
384
+ getEvaluationConfig,
385
+ };