@machinespirits/eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/components/MobileEvalDashboard.tsx +267 -0
- package/components/comparison/DeltaAnalysisTable.tsx +137 -0
- package/components/comparison/ProfileComparisonCard.tsx +176 -0
- package/components/comparison/RecognitionABMode.tsx +385 -0
- package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
- package/components/comparison/WinnerIndicator.tsx +64 -0
- package/components/comparison/index.ts +5 -0
- package/components/mobile/BottomSheet.tsx +233 -0
- package/components/mobile/DimensionBreakdown.tsx +210 -0
- package/components/mobile/DocsView.tsx +363 -0
- package/components/mobile/LogsView.tsx +481 -0
- package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
- package/components/mobile/QuickTestView.tsx +1098 -0
- package/components/mobile/RecognitionTypeChart.tsx +124 -0
- package/components/mobile/RecognitionView.tsx +809 -0
- package/components/mobile/RunDetailView.tsx +261 -0
- package/components/mobile/RunHistoryView.tsx +367 -0
- package/components/mobile/ScoreRadial.tsx +211 -0
- package/components/mobile/StreamingLogPanel.tsx +230 -0
- package/components/mobile/SynthesisStrategyChart.tsx +140 -0
- package/config/interaction-eval-scenarios.yaml +832 -0
- package/config/learner-agents.yaml +248 -0
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
- package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
- package/docs/research/COST-ANALYSIS.md +56 -0
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
- package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
- package/docs/research/PAPER-UNIFIED.md +659 -0
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
- package/docs/research/apa.csl +2133 -0
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
- package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
- package/docs/research/paper-draft/full-paper.md +136 -0
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +515 -0
- package/docs/research/transcript-baseline.md +139 -0
- package/docs/research/transcript-recognition-multiagent.md +187 -0
- package/hooks/useEvalData.ts +625 -0
- package/index.js +27 -0
- package/package.json +73 -0
- package/routes/evalRoutes.js +3002 -0
- package/scripts/advanced-eval-analysis.js +351 -0
- package/scripts/analyze-eval-costs.js +378 -0
- package/scripts/analyze-eval-results.js +513 -0
- package/scripts/analyze-interaction-evals.js +368 -0
- package/server-init.js +45 -0
- package/server.js +162 -0
- package/services/benchmarkService.js +1892 -0
- package/services/evaluationRunner.js +739 -0
- package/services/evaluationStore.js +1121 -0
- package/services/learnerConfigLoader.js +385 -0
- package/services/learnerTutorInteractionEngine.js +857 -0
- package/services/memory/learnerMemoryService.js +1227 -0
- package/services/memory/learnerWritingPad.js +577 -0
- package/services/memory/tutorWritingPad.js +674 -0
- package/services/promptRecommendationService.js +493 -0
- package/services/rubricEvaluator.js +826 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Learner Configuration Loader
|
|
3
|
+
*
|
|
4
|
+
* Loads and manages synthetic learner configuration from YAML files.
|
|
5
|
+
* Supports environment variable overrides and multiple profiles.
|
|
6
|
+
*
|
|
7
|
+
* Uses shared configLoaderBase.js for common loading patterns.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { configLoaderBase, modelResolver } from '@machinespirits/tutor-core';
|
|
11
|
+
const { loadProviders, createConfigLoader, createPromptLoader } = configLoaderBase;
|
|
12
|
+
const { createBoundResolver } = modelResolver;
|
|
13
|
+
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Default Configurations
|
|
16
|
+
// ============================================================================
|
|
17
|
+
|
|
18
|
+
function getDefaultProviders() {
|
|
19
|
+
return {
|
|
20
|
+
openrouter: {
|
|
21
|
+
api_key_env: 'OPENROUTER_API_KEY',
|
|
22
|
+
base_url: 'https://openrouter.ai/api/v1/chat/completions',
|
|
23
|
+
default_model: 'nvidia/nemotron-3-nano-30b-a3b:free',
|
|
24
|
+
models: {
|
|
25
|
+
nemotron: 'nvidia/nemotron-3-nano-30b-a3b:free',
|
|
26
|
+
sonnet: 'anthropic/claude-sonnet-4.5',
|
|
27
|
+
haiku: 'anthropic/claude-haiku-4.5',
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function getDefaultConfig() {
|
|
34
|
+
return {
|
|
35
|
+
active_profile: 'unified',
|
|
36
|
+
providers: getDefaultProviders(),
|
|
37
|
+
architectures: {
|
|
38
|
+
unified: getDefaultArchitecture(),
|
|
39
|
+
},
|
|
40
|
+
personas: {
|
|
41
|
+
eager_novice: getDefaultPersona(),
|
|
42
|
+
},
|
|
43
|
+
profiles: {
|
|
44
|
+
unified: getDefaultProfile(),
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function getDefaultProfile() {
|
|
50
|
+
return {
|
|
51
|
+
description: 'Single unified learner agent',
|
|
52
|
+
architecture: 'unified',
|
|
53
|
+
provider: 'openrouter',
|
|
54
|
+
model: 'nemotron',
|
|
55
|
+
dialogue: {
|
|
56
|
+
enabled: false,
|
|
57
|
+
max_rounds: 0,
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function getDefaultArchitecture() {
|
|
63
|
+
return {
|
|
64
|
+
name: 'Unified Learner',
|
|
65
|
+
description: 'Single agent representing the whole learner',
|
|
66
|
+
agents: {
|
|
67
|
+
unified_learner: {
|
|
68
|
+
role: 'unified_learner',
|
|
69
|
+
prompt_file: 'learner-unified.md',
|
|
70
|
+
hyperparameters: {
|
|
71
|
+
temperature: 0.7,
|
|
72
|
+
max_tokens: 300,
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function getDefaultPersona() {
|
|
80
|
+
return {
|
|
81
|
+
name: 'Eager Novice',
|
|
82
|
+
description: 'Enthusiastic but easily overwhelmed',
|
|
83
|
+
default_architecture: 'unified',
|
|
84
|
+
traits: {
|
|
85
|
+
frustration_threshold: 'low',
|
|
86
|
+
persistence: 'medium',
|
|
87
|
+
prior_knowledge: 'minimal',
|
|
88
|
+
self_confidence: 'low',
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function getDefaultPrompt(filename) {
|
|
94
|
+
const role = filename.replace('learner-', '').replace('.md', '');
|
|
95
|
+
|
|
96
|
+
const defaults = {
|
|
97
|
+
'unified': `You are simulating a learner's internal experience. Respond authentically to the tutor's message, showing genuine reactions including confusion, insight, frustration, or understanding.`,
|
|
98
|
+
'desire': `You represent the DESIRE dimension of a learner. Express immediate wants, frustrations, and emotional reactions.`,
|
|
99
|
+
'intellect': `You represent the INTELLECT dimension of a learner. Process information rationally, identify what makes sense and what doesn't.`,
|
|
100
|
+
'aspiration': `You represent the ASPIRATION dimension of a learner. Express goals, standards, and desire for mastery.`,
|
|
101
|
+
'thesis': `You represent the learner's CURRENT UNDERSTANDING. State what you currently believe or understand about the topic.`,
|
|
102
|
+
'antithesis': `You represent the learner's DOUBT. Challenge the current understanding, raise questions and objections.`,
|
|
103
|
+
'synthesis-dialectical': `You represent the learner's EMERGING UNDERSTANDING. Integrate the thesis and antithesis into a new perspective.`,
|
|
104
|
+
'synthesis': `Synthesize the internal voices into a coherent external response that the learner would actually say.`,
|
|
105
|
+
'novice': `You are the NOVICE voice. Express confusion, basic questions, and unfamiliarity with the material.`,
|
|
106
|
+
'practitioner': `You are the PRACTITIONER voice. Apply prior knowledge, make connections, show developing competence.`,
|
|
107
|
+
'expert': `You are the EXPERT voice. Show deep understanding, make sophisticated connections, identify nuance.`,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
return defaults[role] || `You are simulating part of a learner's internal experience. (Prompt file ${filename} not found)`;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ============================================================================
|
|
114
|
+
// Create Base Loaders
|
|
115
|
+
// ============================================================================
|
|
116
|
+
|
|
117
|
+
const configLoader = createConfigLoader('learner-agents.yaml', getDefaultConfig);
|
|
118
|
+
const promptLoader = createPromptLoader(getDefaultPrompt);
|
|
119
|
+
|
|
120
|
+
// Re-export loadConfig and getProviderConfig from the base loader
|
|
121
|
+
export const loadConfig = configLoader.loadConfig;
|
|
122
|
+
export const getProviderConfig = configLoader.getProviderConfig;
|
|
123
|
+
|
|
124
|
+
// Re-export loadProviders from base
|
|
125
|
+
export { loadProviders };
|
|
126
|
+
|
|
127
|
+
// Re-export prompt loading utilities
|
|
128
|
+
export const loadPrompt = promptLoader.loadPrompt;
|
|
129
|
+
|
|
130
|
+
// ============================================================================
|
|
131
|
+
// Learner-Specific Functions
|
|
132
|
+
// ============================================================================
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Get the active profile configuration
|
|
136
|
+
* @param {string} profileName - Optional profile name override
|
|
137
|
+
* @returns {Object} Profile configuration
|
|
138
|
+
*/
|
|
139
|
+
export function getActiveProfile(profileName = null) {
|
|
140
|
+
const config = loadConfig();
|
|
141
|
+
|
|
142
|
+
// Check for environment variable override
|
|
143
|
+
const envProfile = process.env.LEARNER_PROFILE || process.env.LEARNER_AGENT_PROFILE;
|
|
144
|
+
const targetProfile = profileName || envProfile || config.active_profile || 'unified';
|
|
145
|
+
|
|
146
|
+
const profile = config.profiles?.[targetProfile];
|
|
147
|
+
if (!profile) {
|
|
148
|
+
console.warn(`Learner profile "${targetProfile}" not found, using unified`);
|
|
149
|
+
return config.profiles?.unified || getDefaultProfile();
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
name: targetProfile,
|
|
154
|
+
...profile,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Get architecture configuration
|
|
160
|
+
* @param {string} architectureName - Architecture name (unified, psychodynamic, dialectical, cognitive)
|
|
161
|
+
* @returns {Object} Architecture configuration with agents
|
|
162
|
+
*/
|
|
163
|
+
export function getArchitecture(architectureName) {
|
|
164
|
+
const config = loadConfig();
|
|
165
|
+
const arch = config.architectures?.[architectureName];
|
|
166
|
+
|
|
167
|
+
if (!arch) {
|
|
168
|
+
console.warn(`Architecture "${architectureName}" not found, using unified`);
|
|
169
|
+
return config.architectures?.unified || getDefaultArchitecture();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return arch;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Get persona configuration
|
|
177
|
+
* @param {string} personaId - Persona identifier
|
|
178
|
+
* @returns {Object} Persona configuration
|
|
179
|
+
*/
|
|
180
|
+
export function getPersona(personaId) {
|
|
181
|
+
const config = loadConfig();
|
|
182
|
+
const persona = config.personas?.[personaId];
|
|
183
|
+
|
|
184
|
+
if (!persona) {
|
|
185
|
+
console.warn(`Persona "${personaId}" not found, using eager_novice`);
|
|
186
|
+
return config.personas?.eager_novice || getDefaultPersona();
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
id: personaId,
|
|
191
|
+
...persona,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Get agent configuration for a specific role
|
|
197
|
+
* Mirrors tutorConfigLoader.getAgentConfig pattern - reads per-agent config from profile
|
|
198
|
+
* @param {string} role - Agent role (e.g., 'desire', 'intellect', 'unified_learner', 'synthesis')
|
|
199
|
+
* @param {string} profileName - Optional profile name
|
|
200
|
+
* @returns {Object} Complete agent configuration
|
|
201
|
+
*/
|
|
202
|
+
export function getAgentConfig(role, profileName = null) {
|
|
203
|
+
const profile = getActiveProfile(profileName);
|
|
204
|
+
const agentConfig = profile[role];
|
|
205
|
+
|
|
206
|
+
if (!agentConfig) {
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Get provider configuration from the agent's own settings
|
|
211
|
+
const providerName = agentConfig.provider || 'openrouter';
|
|
212
|
+
const providerConfig = getProviderConfig(providerName);
|
|
213
|
+
|
|
214
|
+
// Resolve model name (short name like 'nemotron' -> full ID)
|
|
215
|
+
const modelAlias = agentConfig.model || 'nemotron';
|
|
216
|
+
const modelFullId = providerConfig.models?.[modelAlias] || modelAlias;
|
|
217
|
+
|
|
218
|
+
// Load prompt file
|
|
219
|
+
const prompt = loadPrompt(agentConfig.prompt_file);
|
|
220
|
+
|
|
221
|
+
return {
|
|
222
|
+
role,
|
|
223
|
+
provider: providerName,
|
|
224
|
+
providerConfig,
|
|
225
|
+
model: modelFullId,
|
|
226
|
+
modelAlias,
|
|
227
|
+
prompt,
|
|
228
|
+
hyperparameters: agentConfig.hyperparameters || {},
|
|
229
|
+
isConfigured: providerConfig.isConfigured,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Get synthesis agent configuration (for multi-agent architectures)
|
|
235
|
+
* Reads from profile.synthesis directly (mirrors per-agent pattern)
|
|
236
|
+
* @param {string} profileName - Optional profile name
|
|
237
|
+
* @returns {Object} Synthesis agent configuration
|
|
238
|
+
*/
|
|
239
|
+
export function getSynthesisConfig(profileName = null) {
|
|
240
|
+
// Use getAgentConfig with 'synthesis' role - same pattern as other agents
|
|
241
|
+
return getAgentConfig('synthesis', profileName);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* List all available profiles
|
|
246
|
+
* @returns {Array} Array of profile summaries
|
|
247
|
+
*/
|
|
248
|
+
export function listProfiles() {
|
|
249
|
+
const config = loadConfig();
|
|
250
|
+
const profiles = config.profiles || {};
|
|
251
|
+
|
|
252
|
+
return Object.entries(profiles).map(([name, profile]) => {
|
|
253
|
+
const agents = getProfileAgentRoles(name);
|
|
254
|
+
return {
|
|
255
|
+
name,
|
|
256
|
+
description: profile.description || '',
|
|
257
|
+
dialogueEnabled: profile.dialogue?.enabled ?? false,
|
|
258
|
+
maxRounds: profile.dialogue?.max_rounds ?? 0,
|
|
259
|
+
agents: agents.map(role => ({
|
|
260
|
+
role,
|
|
261
|
+
provider: profile[role]?.provider,
|
|
262
|
+
model: profile[role]?.model,
|
|
263
|
+
})),
|
|
264
|
+
};
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Get the list of agent roles defined in a profile
|
|
270
|
+
* @param {string} profileName - Profile name
|
|
271
|
+
* @returns {Array<string>} List of agent role names (excluding 'synthesis')
|
|
272
|
+
*/
|
|
273
|
+
export function getProfileAgentRoles(profileName = null) {
|
|
274
|
+
const profile = getActiveProfile(profileName);
|
|
275
|
+
|
|
276
|
+
// Reserved keys that are not agent roles
|
|
277
|
+
const reservedKeys = ['name', 'description', 'dialogue', 'synthesis'];
|
|
278
|
+
|
|
279
|
+
// Get all keys that have a provider/model/prompt_file (indicating an agent config)
|
|
280
|
+
const agentRoles = Object.keys(profile).filter(key => {
|
|
281
|
+
if (reservedKeys.includes(key)) return false;
|
|
282
|
+
const val = profile[key];
|
|
283
|
+
return val && typeof val === 'object' && (val.provider || val.model || val.prompt_file);
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
return agentRoles;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Check if profile has synthesis agent
|
|
291
|
+
* @param {string} profileName - Profile name
|
|
292
|
+
* @returns {boolean} True if profile has synthesis agent
|
|
293
|
+
*/
|
|
294
|
+
export function profileHasSynthesis(profileName = null) {
|
|
295
|
+
const profile = getActiveProfile(profileName);
|
|
296
|
+
return profile.synthesis && typeof profile.synthesis === 'object' && profile.synthesis.prompt_file;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* List all available personas
|
|
301
|
+
* @returns {Array} Array of persona summaries
|
|
302
|
+
*/
|
|
303
|
+
export function listPersonas() {
|
|
304
|
+
const config = loadConfig();
|
|
305
|
+
const personas = config.personas || {};
|
|
306
|
+
|
|
307
|
+
return Object.entries(personas).map(([id, persona]) => ({
|
|
308
|
+
id,
|
|
309
|
+
name: persona.name,
|
|
310
|
+
description: persona.description,
|
|
311
|
+
defaultArchitecture: persona.default_architecture,
|
|
312
|
+
}));
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* List all available architectures
|
|
317
|
+
* @returns {Array} Array of architecture summaries
|
|
318
|
+
*/
|
|
319
|
+
export function listArchitectures() {
|
|
320
|
+
const config = loadConfig();
|
|
321
|
+
const archs = config.architectures || {};
|
|
322
|
+
|
|
323
|
+
return Object.entries(archs).map(([id, arch]) => ({
|
|
324
|
+
id,
|
|
325
|
+
name: arch.name,
|
|
326
|
+
description: arch.description,
|
|
327
|
+
agentCount: Object.keys(arch.agents || {}).length,
|
|
328
|
+
hasSynthesis: !!arch.synthesis,
|
|
329
|
+
}));
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Get logging configuration
|
|
334
|
+
* @returns {Object} Logging settings
|
|
335
|
+
*/
|
|
336
|
+
export function getLoggingConfig() {
|
|
337
|
+
const config = loadConfig();
|
|
338
|
+
return config.logging || {
|
|
339
|
+
log_deliberation: true,
|
|
340
|
+
log_path: 'logs/learner-agents',
|
|
341
|
+
include_internal_state: true,
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Get evaluation configuration
|
|
347
|
+
* @returns {Object} Evaluation settings
|
|
348
|
+
*/
|
|
349
|
+
export function getEvaluationConfig() {
|
|
350
|
+
const config = loadConfig();
|
|
351
|
+
return config.evaluation || {
|
|
352
|
+
track_emotional_state: true,
|
|
353
|
+
track_understanding_level: true,
|
|
354
|
+
track_memory_changes: true,
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Resolve a model reference to full provider config and model ID
|
|
360
|
+
* Delegates to shared modelResolver.js
|
|
361
|
+
*
|
|
362
|
+
* @param {string|Object} ref - Model reference (\"provider.model\" or { provider, model })
|
|
363
|
+
* @returns {Object} { provider, model, apiKey, isConfigured, baseUrl }
|
|
364
|
+
*/
|
|
365
|
+
export const resolveModel = createBoundResolver(getProviderConfig);
|
|
366
|
+
|
|
367
|
+
export default {
|
|
368
|
+
loadConfig,
|
|
369
|
+
loadProviders,
|
|
370
|
+
getActiveProfile,
|
|
371
|
+
getProviderConfig,
|
|
372
|
+
getArchitecture,
|
|
373
|
+
getPersona,
|
|
374
|
+
getAgentConfig,
|
|
375
|
+
getSynthesisConfig,
|
|
376
|
+
getProfileAgentRoles,
|
|
377
|
+
profileHasSynthesis,
|
|
378
|
+
loadPrompt,
|
|
379
|
+
resolveModel,
|
|
380
|
+
listProfiles,
|
|
381
|
+
listPersonas,
|
|
382
|
+
listArchitectures,
|
|
383
|
+
getLoggingConfig,
|
|
384
|
+
getEvaluationConfig,
|
|
385
|
+
};
|