@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -9,19 +9,30 @@ import Database from 'better-sqlite3';
9
9
  import path from 'path';
10
10
  import { fileURLToPath } from 'url';
11
11
  import { randomBytes } from 'crypto';
12
+ import { isPidAlive } from './processUtils.js';
12
13
 
13
14
  const __filename = fileURLToPath(import.meta.url);
14
15
  const __dirname = path.dirname(__filename);
15
16
  const ROOT_DIR = path.resolve(__dirname, '..');
16
17
  const DATA_DIR = path.join(ROOT_DIR, 'data');
17
18
 
18
- // Initialize database
19
- const dbPath = path.join(DATA_DIR, 'evaluations.db');
19
+ // Initialize database — override with EVAL_DB_PATH env var for test isolation
20
+ const dbPath = process.env.EVAL_DB_PATH || path.join(DATA_DIR, 'evaluations.db');
20
21
  const db = new Database(dbPath);
21
22
 
22
23
  // Enable WAL mode for better concurrent access
23
24
  db.pragma('journal_mode = WAL');
24
25
 
26
+ // Migrate: rename evaluator_model → judge_model if the old column exists
27
+ try {
28
+ const cols = db.prepare('PRAGMA table_info(evaluation_results)').all().map(c => c.name);
29
+ if (cols.includes('evaluator_model') && !cols.includes('judge_model')) {
30
+ db.exec('ALTER TABLE evaluation_results RENAME COLUMN evaluator_model TO judge_model');
31
+ }
32
+ } catch (e) {
33
+ // Table may not exist yet (first run)
34
+ }
35
+
25
36
  // Create tables
26
37
  db.exec(`
27
38
  -- Evaluation runs (batches of tests)
@@ -81,7 +92,7 @@ db.exec(`
81
92
 
82
93
  -- Metadata
83
94
  created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
84
- evaluator_model TEXT,
95
+ judge_model TEXT,
85
96
  evaluation_reasoning TEXT,
86
97
  success BOOLEAN DEFAULT 1,
87
98
  error_message TEXT
@@ -103,6 +114,13 @@ try {
103
114
  }
104
115
  db.exec(`CREATE INDEX IF NOT EXISTS idx_results_dialogue ON evaluation_results(dialogue_id)`);
105
116
 
117
+ // Migration: Add scenario_type column if it doesn't exist
118
+ try {
119
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN scenario_type TEXT DEFAULT 'suggestion'`);
120
+ } catch (e) {
121
+ // Column already exists, ignore
122
+ }
123
+
106
124
  // Migration: Add scores_with_reasoning column if it doesn't exist
107
125
  try {
108
126
  db.exec(`ALTER TABLE evaluation_results ADD COLUMN scores_with_reasoning TEXT`);
@@ -117,6 +135,66 @@ try {
117
135
  // Column already exists, ignore
118
136
  }
119
137
 
138
+ // Migration: Add dual scoring columns if they don't exist
139
+ try {
140
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN base_score REAL`);
141
+ } catch (e) {
142
+ // Column already exists, ignore
143
+ }
144
+ try {
145
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN recognition_score REAL`);
146
+ } catch (e) {
147
+ // Column already exists, ignore
148
+ }
149
+
150
+ // Migration: Add ego_model and superego_model columns
151
+ try {
152
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN ego_model TEXT`);
153
+ } catch (e) {
154
+ // Column already exists, ignore
155
+ }
156
+ try {
157
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN superego_model TEXT`);
158
+ } catch (e) {
159
+ // Column already exists, ignore
160
+ }
161
+
162
+ // Migration: Add factorial factor columns
163
+ try {
164
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN factor_recognition BOOLEAN`);
165
+ } catch (e) { /* Column already exists */ }
166
+ try {
167
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN factor_multi_agent_tutor BOOLEAN`);
168
+ } catch (e) { /* Column already exists */ }
169
+ try {
170
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN factor_multi_agent_learner BOOLEAN`);
171
+ } catch (e) { /* Column already exists */ }
172
+ try {
173
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN learner_architecture TEXT`);
174
+ } catch (e) { /* Column already exists */ }
175
+ try {
176
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN scoring_method TEXT`);
177
+ } catch (e) { /* Column already exists */ }
178
+
179
+ // Migration: Add learner-side evaluation columns to evaluation_results
180
+ try {
181
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN learner_scores TEXT`);
182
+ } catch (e) { /* Column already exists */ }
183
+ try {
184
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN learner_overall_score REAL`);
185
+ } catch (e) { /* Column already exists */ }
186
+ try {
187
+ db.exec(`ALTER TABLE evaluation_results ADD COLUMN learner_judge_model TEXT`);
188
+ } catch (e) { /* Column already exists */ }
189
+
190
+ // Migration: Add reproducibility metadata columns to evaluation_runs
191
+ try {
192
+ db.exec(`ALTER TABLE evaluation_runs ADD COLUMN git_commit TEXT`);
193
+ } catch (e) { /* Column already exists */ }
194
+ try {
195
+ db.exec(`ALTER TABLE evaluation_runs ADD COLUMN package_version TEXT`);
196
+ } catch (e) { /* Column already exists */ }
197
+
120
198
  // Migration: Revert any accidental renames (batch→matrix, interact→interaction)
121
199
  try {
122
200
  const revertRuns = db.prepare(`
@@ -181,6 +259,17 @@ db.exec(`
181
259
  CREATE INDEX IF NOT EXISTS idx_interaction_created ON interaction_evaluations(created_at);
182
260
  `);
183
261
 
262
+ // Migration: Add learner-side evaluation columns to interaction_evaluations
263
+ try {
264
+ db.exec(`ALTER TABLE interaction_evaluations ADD COLUMN learner_scores TEXT`);
265
+ } catch (e) { /* Column already exists */ }
266
+ try {
267
+ db.exec(`ALTER TABLE interaction_evaluations ADD COLUMN learner_overall_score REAL`);
268
+ } catch (e) { /* Column already exists */ }
269
+ try {
270
+ db.exec(`ALTER TABLE interaction_evaluations ADD COLUMN learner_judge_model TEXT`);
271
+ } catch (e) { /* Column already exists */ }
272
+
184
273
  /**
185
274
  * Generate a unique run ID
186
275
  */
@@ -205,13 +294,14 @@ export function createRun(options = {}) {
205
294
  } = options;
206
295
 
207
296
  const id = generateRunId();
297
+ const now = new Date().toISOString();
208
298
 
209
299
  const stmt = db.prepare(`
210
- INSERT INTO evaluation_runs (id, description, total_scenarios, total_configurations, metadata)
211
- VALUES (?, ?, ?, ?, ?)
300
+ INSERT INTO evaluation_runs (id, created_at, description, total_scenarios, total_configurations, metadata, git_commit, package_version)
301
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
212
302
  `);
213
303
 
214
- stmt.run(id, description, totalScenarios, totalConfigurations, JSON.stringify(metadata));
304
+ stmt.run(id, now, description, totalScenarios, totalConfigurations, JSON.stringify(metadata), metadata.gitCommit || null, metadata.packageVersion || null);
215
305
 
216
306
  return {
217
307
  id,
@@ -227,16 +317,29 @@ export function createRun(options = {}) {
227
317
  * Update a run's status
228
318
  */
229
319
  export function updateRun(runId, updates) {
230
- const { status, totalTests, completedAt } = updates;
320
+ const { status, totalTests, completedAt, metadata } = updates;
321
+
322
+ // If metadata provided, merge with existing
323
+ if (metadata) {
324
+ const existing = getRun(runId);
325
+ const mergedMetadata = { ...(existing?.metadata || {}), ...metadata };
326
+ const stmt = db.prepare(`UPDATE evaluation_runs SET metadata = ? WHERE id = ?`);
327
+ stmt.run(JSON.stringify(mergedMetadata), runId);
328
+ }
231
329
 
232
330
  if (status === 'completed') {
233
331
  const stmt = db.prepare(`
234
332
  UPDATE evaluation_runs
235
- SET status = ?, total_tests = ?, completed_at = ?
333
+ SET status = ?, completed_at = ?
236
334
  WHERE id = ?
237
335
  `);
238
- stmt.run(status, totalTests || 0, completedAt || new Date().toISOString(), runId);
239
- } else {
336
+ stmt.run(status, completedAt || new Date().toISOString(), runId);
337
+ } else if (status && totalTests != null) {
338
+ const stmt = db.prepare(`
339
+ UPDATE evaluation_runs SET status = ?, total_tests = ? WHERE id = ?
340
+ `);
341
+ stmt.run(status, totalTests, runId);
342
+ } else if (status) {
240
343
  const stmt = db.prepare(`
241
344
  UPDATE evaluation_runs SET status = ? WHERE id = ?
242
345
  `);
@@ -254,23 +357,33 @@ export function updateRun(runId, updates) {
254
357
  export function storeResult(runId, result) {
255
358
  const stmt = db.prepare(`
256
359
  INSERT INTO evaluation_results (
257
- run_id, scenario_id, scenario_name,
360
+ run_id, scenario_id, scenario_name, scenario_type,
258
361
  provider, model, profile_name, hyperparameters, prompt_id,
362
+ ego_model, superego_model,
259
363
  suggestions, raw_response,
260
364
  latency_ms, input_tokens, output_tokens, cost, dialogue_rounds, api_calls, dialogue_id,
261
365
  score_relevance, score_specificity, score_pedagogical,
262
366
  score_personalization, score_actionability, score_tone, overall_score,
367
+ base_score, recognition_score,
263
368
  passes_required, passes_forbidden, required_missing, forbidden_found,
264
- evaluator_model, evaluation_reasoning, scores_with_reasoning, success, error_message
369
+ judge_model, evaluation_reasoning, scores_with_reasoning, success, error_message,
370
+ factor_recognition, factor_multi_agent_tutor, factor_multi_agent_learner, learner_architecture,
371
+ scoring_method,
372
+ created_at
265
373
  ) VALUES (
266
- ?, ?, ?,
374
+ ?, ?, ?, ?,
267
375
  ?, ?, ?, ?, ?,
268
376
  ?, ?,
377
+ ?, ?,
269
378
  ?, ?, ?, ?, ?, ?, ?,
270
379
  ?, ?, ?,
271
380
  ?, ?, ?, ?,
381
+ ?, ?,
382
+ ?, ?, ?, ?,
383
+ ?, ?, ?, ?, ?,
272
384
  ?, ?, ?, ?,
273
- ?, ?, ?, ?, ?
385
+ ?,
386
+ ?
274
387
  )
275
388
  `);
276
389
 
@@ -278,11 +391,14 @@ export function storeResult(runId, result) {
278
391
  runId,
279
392
  result.scenarioId,
280
393
  result.scenarioName,
394
+ result.scenarioType || 'suggestion',
281
395
  result.provider,
282
396
  result.model,
283
397
  result.profileName,
284
398
  JSON.stringify(result.hyperparameters || {}),
285
399
  result.promptId,
400
+ result.egoModel || null,
401
+ result.superegoModel || null,
286
402
  JSON.stringify(result.suggestions || []),
287
403
  result.rawResponse,
288
404
  result.latencyMs,
@@ -299,15 +415,23 @@ export function storeResult(runId, result) {
299
415
  result.scores?.actionability,
300
416
  result.scores?.tone,
301
417
  result.overallScore,
418
+ result.baseScore,
419
+ result.recognitionScore,
302
420
  result.passesRequired ? 1 : 0,
303
421
  result.passesForbidden ? 1 : 0,
304
422
  JSON.stringify(result.requiredMissing || []),
305
423
  JSON.stringify(result.forbiddenFound || []),
306
- result.evaluatorModel,
424
+ result.judgeModel,
307
425
  result.evaluationReasoning,
308
426
  result.scoresWithReasoning ? JSON.stringify(result.scoresWithReasoning) : null,
309
427
  result.success ? 1 : 0,
310
- result.errorMessage
428
+ result.errorMessage,
429
+ result.factors?.recognition != null ? (result.factors.recognition ? 1 : 0) : null,
430
+ result.factors?.multi_agent_tutor != null ? (result.factors.multi_agent_tutor ? 1 : 0) : null,
431
+ result.factors?.multi_agent_learner != null ? (result.factors.multi_agent_learner ? 1 : 0) : null,
432
+ result.learnerArchitecture || null,
433
+ result.scoringMethod || null,
434
+ new Date().toISOString()
311
435
  );
312
436
 
313
437
  return info.lastInsertRowid;
@@ -331,6 +455,8 @@ export function getRun(runId) {
331
455
  status: row.status,
332
456
  completedAt: row.completed_at,
333
457
  metadata: JSON.parse(row.metadata || '{}'),
458
+ gitCommit: row.git_commit,
459
+ packageVersion: row.package_version,
334
460
  };
335
461
  }
336
462
 
@@ -338,7 +464,7 @@ export function getRun(runId) {
338
464
  * List all runs with scenario names
339
465
  */
340
466
  export function listRuns(options = {}) {
341
- const { limit = 20, status = null } = options;
467
+ const { limit = null, status = null } = options;
342
468
 
343
469
  let query = 'SELECT * FROM evaluation_runs';
344
470
  const params = [];
@@ -348,8 +474,11 @@ export function listRuns(options = {}) {
348
474
  params.push(status);
349
475
  }
350
476
 
351
- query += ' ORDER BY created_at DESC LIMIT ?';
352
- params.push(limit);
477
+ query += ' ORDER BY created_at ASC';
478
+ if (limit) {
479
+ query += ' LIMIT ?';
480
+ params.push(limit);
481
+ }
353
482
 
354
483
  const stmt = db.prepare(query);
355
484
  const rows = stmt.all(...params);
@@ -361,9 +490,59 @@ export function listRuns(options = {}) {
361
490
  ORDER BY scenario_name
362
491
  `);
363
492
 
493
+ // Count completed results per run
494
+ const resultCountStmt = db.prepare(`
495
+ SELECT COUNT(*) as completed,
496
+ SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful,
497
+ AVG(overall_score) as avg_score
498
+ FROM evaluation_results WHERE run_id = ?
499
+ `);
500
+
501
+ // Get distinct ego + superego models for each run
502
+ const modelStmt = db.prepare(`
503
+ SELECT DISTINCT ego_model FROM evaluation_results
504
+ WHERE run_id = ? AND ego_model IS NOT NULL
505
+ ORDER BY ego_model
506
+ `);
507
+ const superegoModelStmt = db.prepare(`
508
+ SELECT DISTINCT superego_model FROM evaluation_results
509
+ WHERE run_id = ? AND superego_model IS NOT NULL
510
+ ORDER BY superego_model
511
+ `);
512
+
364
513
  return rows.map(row => {
365
514
  const scenarioRows = scenarioStmt.all(row.id);
366
515
  const scenarioNames = scenarioRows.map(s => s.scenario_name).filter(Boolean);
516
+ const counts = resultCountStmt.get(row.id);
517
+
518
+ const extractAlias = (raw) => {
519
+ if (!raw) return null;
520
+ const dotIdx = raw.indexOf('.');
521
+ return dotIdx !== -1 ? raw.slice(dotIdx + 1) : raw;
522
+ };
523
+
524
+ const modelRows = modelStmt.all(row.id);
525
+ const superegoRows = superegoModelStmt.all(row.id);
526
+ const models = [...new Set([
527
+ ...modelRows.map(m => extractAlias(m.ego_model)),
528
+ ...superegoRows.map(m => extractAlias(m.superego_model)),
529
+ ].filter(Boolean))];
530
+
531
+ const completedResults = counts?.completed || 0;
532
+ const totalTests = row.total_tests || 0;
533
+ const progressPct = totalTests > 0 ? Math.min(100, Math.round((completedResults / totalTests) * 100)) : null;
534
+
535
+ // Compute duration: for completed runs use completed_at - created_at;
536
+ // for running runs compute elapsed from now.
537
+ let durationMs = null;
538
+ if (row.created_at) {
539
+ const start = new Date(row.created_at).getTime();
540
+ if (row.completed_at) {
541
+ durationMs = new Date(row.completed_at).getTime() - start;
542
+ } else if (row.status === 'running') {
543
+ durationMs = Date.now() - start;
544
+ }
545
+ }
367
546
 
368
547
  return {
369
548
  id: row.id,
@@ -371,10 +550,16 @@ export function listRuns(options = {}) {
371
550
  description: row.description,
372
551
  totalScenarios: row.total_scenarios,
373
552
  totalConfigurations: row.total_configurations,
374
- totalTests: row.total_tests,
553
+ totalTests,
554
+ completedResults,
555
+ successfulResults: counts?.successful || 0,
556
+ avgScore: counts?.avg_score || null,
557
+ progressPct,
558
+ durationMs,
375
559
  status: row.status,
376
560
  completedAt: row.completed_at,
377
561
  scenarioNames, // Scenario names from results
562
+ models, // Distinct ego model aliases used
378
563
  metadata: JSON.parse(row.metadata || '{}'), // Structured metadata
379
564
  };
380
565
  });
@@ -384,7 +569,7 @@ export function listRuns(options = {}) {
384
569
  * Get results for a run
385
570
  */
386
571
  export function getResults(runId, options = {}) {
387
- const { scenarioId = null, provider = null, model = null } = options;
572
+ const { scenarioId = null, provider = null, model = null, profileName = null } = options;
388
573
 
389
574
  let query = 'SELECT * FROM evaluation_results WHERE run_id = ?';
390
575
  const params = [runId];
@@ -404,6 +589,11 @@ export function getResults(runId, options = {}) {
404
589
  params.push(model);
405
590
  }
406
591
 
592
+ if (profileName) {
593
+ query += ' AND profile_name = ?';
594
+ params.push(profileName);
595
+ }
596
+
407
597
  query += ' ORDER BY created_at';
408
598
 
409
599
  const stmt = db.prepare(query);
@@ -420,6 +610,9 @@ export function getRunStats(runId) {
420
610
  SELECT
421
611
  provider,
422
612
  model,
613
+ profile_name,
614
+ ego_model,
615
+ superego_model,
423
616
  COUNT(*) as total_tests,
424
617
  SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) as successful_tests,
425
618
  AVG(overall_score) as avg_score,
@@ -429,6 +622,8 @@ export function getRunStats(runId) {
429
622
  AVG(score_personalization) as avg_personalization,
430
623
  AVG(score_actionability) as avg_actionability,
431
624
  AVG(score_tone) as avg_tone,
625
+ AVG(base_score) as avg_base_score,
626
+ AVG(recognition_score) as avg_recognition_score,
432
627
  AVG(latency_ms) as avg_latency,
433
628
  SUM(input_tokens) as total_input_tokens,
434
629
  SUM(output_tokens) as total_output_tokens,
@@ -436,7 +631,7 @@ export function getRunStats(runId) {
436
631
  SUM(CASE WHEN passes_forbidden = 1 THEN 1 ELSE 0 END) as passes_forbidden
437
632
  FROM evaluation_results
438
633
  WHERE run_id = ?
439
- GROUP BY provider, model
634
+ GROUP BY provider, model, profile_name
440
635
  ORDER BY avg_score DESC
441
636
  `);
442
637
 
@@ -445,10 +640,15 @@ export function getRunStats(runId) {
445
640
  return rows.map(row => ({
446
641
  provider: row.provider,
447
642
  model: row.model,
643
+ profileName: row.profile_name,
644
+ egoModel: row.ego_model,
645
+ superegoModel: row.superego_model,
448
646
  totalTests: row.total_tests,
449
647
  successfulTests: row.successful_tests,
450
648
  successRate: row.total_tests > 0 ? row.successful_tests / row.total_tests : 0,
451
649
  avgScore: row.avg_score,
650
+ avgBaseScore: row.avg_base_score,
651
+ avgRecognitionScore: row.avg_recognition_score,
452
652
  dimensions: {
453
653
  relevance: row.avg_relevance,
454
654
  specificity: row.avg_specificity,
@@ -478,13 +678,18 @@ export function getScenarioStats(runId) {
478
678
  scenario_name,
479
679
  provider,
480
680
  model,
681
+ profile_name,
682
+ ego_model,
683
+ superego_model,
481
684
  AVG(overall_score) as avg_score,
685
+ AVG(base_score) as avg_base_score,
686
+ AVG(recognition_score) as avg_recognition_score,
482
687
  AVG(latency_ms) as avg_latency,
483
688
  SUM(CASE WHEN passes_required = 1 AND passes_forbidden = 1 THEN 1 ELSE 0 END) as passes_validation,
484
689
  COUNT(*) as runs
485
690
  FROM evaluation_results
486
691
  WHERE run_id = ?
487
- GROUP BY scenario_id, provider, model
692
+ GROUP BY scenario_id, provider, model, profile_name
488
693
  ORDER BY scenario_id, avg_score DESC
489
694
  `);
490
695
 
@@ -503,7 +708,12 @@ export function getScenarioStats(runId) {
503
708
  grouped[row.scenario_id].configurations.push({
504
709
  provider: row.provider,
505
710
  model: row.model,
711
+ profileName: row.profile_name,
712
+ egoModel: row.ego_model,
713
+ superegoModel: row.superego_model,
506
714
  avgScore: row.avg_score,
715
+ avgBaseScore: row.avg_base_score,
716
+ avgRecognitionScore: row.avg_recognition_score,
507
717
  avgLatencyMs: row.avg_latency,
508
718
  passesValidation: row.passes_validation === row.runs,
509
719
  runs: row.runs,
@@ -623,7 +833,16 @@ export function exportToCsv(runId) {
623
833
  r.success ? 1 : 0,
624
834
  ]);
625
835
 
626
- return [headers.join(','), ...rows.map(row => row.join(','))].join('\n');
836
+ const escapeCsvField = (value) => {
837
+ if (value == null) return '';
838
+ const str = String(value);
839
+ if (str.includes(',') || str.includes('"') || str.includes('\n')) {
840
+ return '"' + str.replace(/"/g, '""') + '"';
841
+ }
842
+ return str;
843
+ };
844
+
845
+ return [headers.join(','), ...rows.map(row => row.map(escapeCsvField).join(','))].join('\n');
627
846
  }
628
847
 
629
848
  /**
@@ -734,6 +953,8 @@ export function findIncompleteRuns(options = {}) {
734
953
  return rows.map(row => {
735
954
  const resultsStmt = db.prepare('SELECT COUNT(*) as count FROM evaluation_results WHERE run_id = ?');
736
955
  const resultsCount = resultsStmt.get(row.id).count;
956
+ const metadata = JSON.parse(row.metadata || '{}');
957
+ const pid = metadata?.pid;
737
958
 
738
959
  return {
739
960
  id: row.id,
@@ -744,7 +965,9 @@ export function findIncompleteRuns(options = {}) {
744
965
  expectedTests: row.total_scenarios * row.total_configurations,
745
966
  resultsFound: resultsCount,
746
967
  ageMinutes: Math.round((Date.now() - new Date(row.created_at).getTime()) / 60000),
747
- metadata: JSON.parse(row.metadata || '{}'),
968
+ metadata,
969
+ pid,
970
+ pidAlive: isPidAlive(pid),
748
971
  };
749
972
  });
750
973
  }
@@ -762,16 +985,28 @@ export function autoCompleteStaleRuns(options = {}) {
762
985
 
763
986
  const incompleteRuns = findIncompleteRuns({ olderThanMinutes });
764
987
 
988
+ // Filter out runs whose PID is still alive
989
+ const staleRuns = incompleteRuns.filter(run => {
990
+ const pid = run.metadata?.pid;
991
+ const isAlive = isPidAlive(pid);
992
+ if (isAlive) {
993
+ console.log(` Skipping ${run.id}: pid ${pid} still running`);
994
+ }
995
+ return !isAlive;
996
+ });
997
+
765
998
  if (dryRun) {
766
999
  return {
767
1000
  dryRun: true,
768
1001
  found: incompleteRuns.length,
769
- runs: incompleteRuns,
1002
+ stale: staleRuns.length,
1003
+ skippedAlive: incompleteRuns.length - staleRuns.length,
1004
+ runs: staleRuns,
770
1005
  };
771
1006
  }
772
1007
 
773
1008
  const completed = [];
774
- for (const run of incompleteRuns) {
1009
+ for (const run of staleRuns) {
775
1010
  try {
776
1011
  const result = completeRun(run.id);
777
1012
  completed.push(result);
@@ -786,6 +1021,8 @@ export function autoCompleteStaleRuns(options = {}) {
786
1021
 
787
1022
  return {
788
1023
  found: incompleteRuns.length,
1024
+ stale: staleRuns.length,
1025
+ skippedAlive: incompleteRuns.length - staleRuns.length,
789
1026
  completed: completed.length,
790
1027
  runs: completed,
791
1028
  };
@@ -827,8 +1064,9 @@ export function getIncompleteTests(runId, profiles, scenarios) {
827
1064
  const results = getResults(runId);
828
1065
  const completedSet = new Set();
829
1066
 
830
- // Build set of completed (profile, scenarioId) pairs
1067
+ // Build set of completed (profile, scenarioId) pairs — only count successes
831
1068
  for (const result of results) {
1069
+ if (result.success === false || result.success === 0) continue;
832
1070
  const key = `${result.profileName}:${result.scenarioId}`;
833
1071
  completedSet.add(key);
834
1072
  }
@@ -899,9 +1137,12 @@ function parseResultRow(row) {
899
1137
  runId: row.run_id,
900
1138
  scenarioId: row.scenario_id,
901
1139
  scenarioName: row.scenario_name,
1140
+ scenarioType: row.scenario_type || 'suggestion',
902
1141
  provider: row.provider,
903
1142
  model: row.model,
904
1143
  profileName: row.profile_name,
1144
+ egoModel: row.ego_model,
1145
+ superegoModel: row.superego_model,
905
1146
  hyperparameters: JSON.parse(row.hyperparameters || '{}'),
906
1147
  promptId: row.prompt_id,
907
1148
  suggestions: JSON.parse(row.suggestions || '[]'),
@@ -914,15 +1155,29 @@ function parseResultRow(row) {
914
1155
  dialogueId: row.dialogue_id,
915
1156
  scores,
916
1157
  overallScore: row.overall_score,
1158
+ scoringMethod: row.scoring_method || null,
1159
+ baseScore: row.base_score,
1160
+ recognitionScore: row.recognition_score,
917
1161
  passesRequired: Boolean(row.passes_required),
918
1162
  passesForbidden: Boolean(row.passes_forbidden),
919
1163
  requiredMissing: JSON.parse(row.required_missing || '[]'),
920
1164
  forbiddenFound: JSON.parse(row.forbidden_found || '[]'),
921
- evaluatorModel: row.evaluator_model,
1165
+ judgeModel: row.judge_model,
922
1166
  evaluationReasoning: row.evaluation_reasoning,
923
1167
  success: Boolean(row.success),
924
1168
  errorMessage: row.error_message,
925
1169
  createdAt: row.created_at,
1170
+ factors: (row.factor_recognition != null || row.factor_multi_agent_tutor != null || row.factor_multi_agent_learner != null)
1171
+ ? {
1172
+ recognition: Boolean(row.factor_recognition),
1173
+ multi_agent_tutor: Boolean(row.factor_multi_agent_tutor),
1174
+ multi_agent_learner: Boolean(row.factor_multi_agent_learner),
1175
+ }
1176
+ : null,
1177
+ learnerArchitecture: row.learner_architecture || null,
1178
+ learnerScores: row.learner_scores ? JSON.parse(row.learner_scores) : null,
1179
+ learnerOverallScore: row.learner_overall_score != null ? row.learner_overall_score : null,
1180
+ learnerJudgeModel: row.learner_judge_model || null,
926
1181
  };
927
1182
  }
928
1183
 
@@ -1052,6 +1307,9 @@ export function getInteractionEval(evalId) {
1052
1307
  uniqueOutcomes: JSON.parse(row.unique_outcomes || '[]'),
1053
1308
  judgeOverallScore: row.judge_overall_score,
1054
1309
  judgeEvaluation: JSON.parse(row.judge_evaluation || 'null'),
1310
+ learnerScores: JSON.parse(row.learner_scores || 'null'),
1311
+ learnerOverallScore: row.learner_overall_score,
1312
+ learnerJudgeModel: row.learner_judge_model,
1055
1313
  createdAt: row.created_at,
1056
1314
  };
1057
1315
  }
@@ -1096,10 +1354,284 @@ export function getInteractionEvalByRunId(runId) {
1096
1354
  };
1097
1355
  }
1098
1356
 
1357
+ /**
1358
+ * Get factorial cell data for ANOVA analysis.
1359
+ *
1360
+ * Returns scores grouped by cell key ("r0_t0_l0", etc.)
1361
+ * Only includes results that have factor tags stored.
1362
+ *
1363
+ * @param {string} runId - The run ID
1364
+ * @param {Object} [options] - Options
1365
+ * @param {string} [options.scoreColumn='overall_score'] - Which score to use
1366
+ * @returns {Object} Map of cellKey → [score, ...]
1367
+ */
1368
+ export function getFactorialCellData(runId, options = {}) {
1369
+ const { scoreColumn = 'overall_score' } = options;
1370
+
1371
+ // Whitelist valid score columns to prevent SQL injection
1372
+ const validColumns = ['overall_score', 'base_score', 'recognition_score'];
1373
+ const col = validColumns.includes(scoreColumn) ? scoreColumn : 'overall_score';
1374
+
1375
+ const stmt = db.prepare(`
1376
+ SELECT factor_recognition, factor_multi_agent_tutor, factor_multi_agent_learner, ${col} as score
1377
+ FROM evaluation_results
1378
+ WHERE run_id = ? AND factor_recognition IS NOT NULL AND ${col} IS NOT NULL AND success = 1
1379
+ `);
1380
+
1381
+ const rows = stmt.all(runId);
1382
+ const cells = {};
1383
+
1384
+ for (const row of rows) {
1385
+ const key = `r${row.factor_recognition}_t${row.factor_multi_agent_tutor}_l${row.factor_multi_agent_learner}`;
1386
+ if (!cells[key]) cells[key] = [];
1387
+ cells[key].push(row.score);
1388
+ }
1389
+
1390
+ return cells;
1391
+ }
1392
+
1393
+ /**
1394
+ * Store a new judgment row for an existing result (preserves judgment history).
1395
+ * Copies the original result's response data but adds new scores from a different judge.
1396
+ * This enables inter-judge reliability analysis.
1397
+ *
1398
+ * @param {Object} originalResult - The original result row (from getResults)
1399
+ * @param {Object} evaluation - The new evaluation scores
1400
+ * @returns {number} The new row ID
1401
+ */
1402
+ export function storeRejudgment(originalResult, evaluation) {
1403
+ const stmt = db.prepare(`
1404
+ INSERT INTO evaluation_results (
1405
+ run_id, scenario_id, scenario_name, scenario_type,
1406
+ provider, model, profile_name, hyperparameters, prompt_id,
1407
+ ego_model, superego_model,
1408
+ suggestions, raw_response,
1409
+ latency_ms, input_tokens, output_tokens, cost, dialogue_rounds, api_calls, dialogue_id,
1410
+ score_relevance, score_specificity, score_pedagogical,
1411
+ score_personalization, score_actionability, score_tone, overall_score,
1412
+ base_score, recognition_score,
1413
+ passes_required, passes_forbidden, required_missing, forbidden_found,
1414
+ judge_model, evaluation_reasoning, scores_with_reasoning, success, error_message,
1415
+ factor_recognition, factor_multi_agent_tutor, factor_multi_agent_learner, learner_architecture,
1416
+ scoring_method,
1417
+ created_at
1418
+ ) VALUES (
1419
+ ?, ?, ?, ?,
1420
+ ?, ?, ?, ?, ?,
1421
+ ?, ?,
1422
+ ?, ?,
1423
+ ?, ?, ?, ?, ?, ?, ?,
1424
+ ?, ?, ?,
1425
+ ?, ?, ?, ?,
1426
+ ?, ?,
1427
+ ?, ?, ?, ?,
1428
+ ?, ?, ?, ?, ?,
1429
+ ?, ?, ?, ?,
1430
+ ?,
1431
+ ?
1432
+ )
1433
+ `);
1434
+
1435
+ const scores = evaluation.scores || {};
1436
+
1437
+ const info = stmt.run(
1438
+ originalResult.runId,
1439
+ originalResult.scenarioId,
1440
+ originalResult.scenarioName,
1441
+ originalResult.scenarioType || 'suggestion',
1442
+ originalResult.provider,
1443
+ originalResult.model,
1444
+ originalResult.profileName,
1445
+ typeof originalResult.hyperparameters === 'string'
1446
+ ? originalResult.hyperparameters
1447
+ : JSON.stringify(originalResult.hyperparameters || {}),
1448
+ originalResult.promptId,
1449
+ originalResult.egoModel || null,
1450
+ originalResult.superegoModel || null,
1451
+ typeof originalResult.suggestions === 'string'
1452
+ ? originalResult.suggestions
1453
+ : JSON.stringify(originalResult.suggestions || []),
1454
+ originalResult.rawResponse,
1455
+ originalResult.latencyMs,
1456
+ originalResult.inputTokens,
1457
+ originalResult.outputTokens,
1458
+ originalResult.cost,
1459
+ originalResult.dialogueRounds,
1460
+ originalResult.apiCalls,
1461
+ originalResult.dialogueId,
1462
+ // New scores from the new judge
1463
+ scores.relevance?.score ?? scores.relevance ?? null,
1464
+ scores.specificity?.score ?? scores.specificity ?? null,
1465
+ scores.pedagogical?.score ?? scores.pedagogical ?? null,
1466
+ scores.personalization?.score ?? scores.personalization ?? null,
1467
+ scores.actionability?.score ?? scores.actionability ?? null,
1468
+ scores.tone?.score ?? scores.tone ?? null,
1469
+ evaluation.overallScore ?? null,
1470
+ evaluation.baseScore ?? null,
1471
+ evaluation.recognitionScore ?? null,
1472
+ evaluation.passesRequired ? 1 : 0,
1473
+ evaluation.passesForbidden ? 1 : 0,
1474
+ JSON.stringify(evaluation.requiredMissing || []),
1475
+ JSON.stringify(evaluation.forbiddenFound || []),
1476
+ evaluation.judgeModel || null,
1477
+ evaluation.summary || null,
1478
+ evaluation.scores ? JSON.stringify(evaluation.scores) : null,
1479
+ 1, // success
1480
+ null, // error_message
1481
+ originalResult.factorRecognition ?? null,
1482
+ originalResult.factorMultiAgentTutor ?? null,
1483
+ originalResult.factorMultiAgentLearner ?? null,
1484
+ originalResult.learnerArchitecture || null,
1485
+ 'rubric', // Rejudgments only store successful rubric evaluations
1486
+ new Date().toISOString()
1487
+ );
1488
+
1489
+ return info.lastInsertRowid;
1490
+ }
1491
+
1492
+ /**
1493
+ * Update score columns for an existing result row (for rejudging - overwrites history)
1494
+ * @deprecated Use storeRejudgment() to preserve judgment history for reliability analysis
1495
+ */
1496
+ export function updateResultScores(resultId, evaluation) {
1497
+ const stmt = db.prepare(`
1498
+ UPDATE evaluation_results SET
1499
+ score_relevance = ?,
1500
+ score_specificity = ?,
1501
+ score_pedagogical = ?,
1502
+ score_personalization = ?,
1503
+ score_actionability = ?,
1504
+ score_tone = ?,
1505
+ overall_score = ?,
1506
+ base_score = ?,
1507
+ recognition_score = ?,
1508
+ passes_required = ?,
1509
+ passes_forbidden = ?,
1510
+ required_missing = ?,
1511
+ forbidden_found = ?,
1512
+ judge_model = ?,
1513
+ evaluation_reasoning = ?,
1514
+ scores_with_reasoning = ?,
1515
+ scoring_method = ?
1516
+ WHERE id = ?
1517
+ `);
1518
+
1519
+ const scores = evaluation.scores || {};
1520
+ stmt.run(
1521
+ scores.relevance?.score ?? scores.relevance ?? null,
1522
+ scores.specificity?.score ?? scores.specificity ?? null,
1523
+ scores.pedagogical?.score ?? scores.pedagogical ?? null,
1524
+ scores.personalization?.score ?? scores.personalization ?? null,
1525
+ scores.actionability?.score ?? scores.actionability ?? null,
1526
+ scores.tone?.score ?? scores.tone ?? null,
1527
+ evaluation.overallScore ?? null,
1528
+ evaluation.baseScore ?? null,
1529
+ evaluation.recognitionScore ?? null,
1530
+ evaluation.passesRequired ? 1 : 0,
1531
+ evaluation.passesForbidden ? 1 : 0,
1532
+ JSON.stringify(evaluation.requiredMissing || []),
1533
+ JSON.stringify(evaluation.forbiddenFound || []),
1534
+ evaluation.judgeModel || null,
1535
+ evaluation.summary || null,
1536
+ evaluation.scores ? JSON.stringify(evaluation.scores) : null,
1537
+ 'rubric', // Only called on successful evaluations
1538
+ resultId
1539
+ );
1540
+ }
1541
+
1542
+ /**
1543
+ * Update learner-side evaluation scores on an evaluation_results row.
1544
+ *
1545
+ * @param {string} resultId - The evaluation result ID
1546
+ * @param {Object} evaluation - Learner evaluation data
1547
+ * @param {Object} evaluation.scores - Per-turn learner scores (JSON-serializable)
1548
+ * @param {number} evaluation.overallScore - Weighted average learner score (0-100)
1549
+ * @param {string} evaluation.judgeModel - Model used for judging
1550
+ */
1551
+ export function updateResultLearnerScores(resultId, evaluation) {
1552
+ const stmt = db.prepare(`
1553
+ UPDATE evaluation_results SET
1554
+ learner_scores = ?,
1555
+ learner_overall_score = ?,
1556
+ learner_judge_model = ?
1557
+ WHERE id = ?
1558
+ `);
1559
+
1560
+ stmt.run(
1561
+ JSON.stringify(evaluation.scores),
1562
+ evaluation.overallScore,
1563
+ evaluation.judgeModel || null,
1564
+ resultId
1565
+ );
1566
+ }
1567
+
1568
+ /**
1569
+ * List all interaction evaluations for a given run ID.
1570
+ *
1571
+ * @param {string} runId - The run ID
1572
+ * @returns {Array} Array of interaction evaluation objects
1573
+ */
1574
+ export function listInteractionEvalsByRunId(runId) {
1575
+ const stmt = db.prepare('SELECT * FROM interaction_evaluations WHERE run_id = ? ORDER BY created_at');
1576
+ const rows = stmt.all(runId);
1577
+
1578
+ return rows.map(row => ({
1579
+ evalId: row.id,
1580
+ runId: row.run_id,
1581
+ scenarioId: row.scenario_id,
1582
+ scenarioName: row.scenario_name,
1583
+ evalType: row.eval_type,
1584
+ learnerProfile: row.learner_profile,
1585
+ tutorProfile: row.tutor_profile,
1586
+ personaId: row.persona_id,
1587
+ learnerAgents: JSON.parse(row.learner_agents || '[]'),
1588
+ turnCount: row.turn_count,
1589
+ turns: JSON.parse(row.turns || '[]'),
1590
+ formattedTranscript: row.formatted_transcript,
1591
+ totalTokens: row.total_tokens,
1592
+ finalLearnerState: row.final_learner_state,
1593
+ finalUnderstanding: row.final_understanding,
1594
+ judgeOverallScore: row.judge_overall_score,
1595
+ learnerScores: JSON.parse(row.learner_scores || 'null'),
1596
+ learnerOverallScore: row.learner_overall_score,
1597
+ learnerJudgeModel: row.learner_judge_model,
1598
+ createdAt: row.created_at,
1599
+ }));
1600
+ }
1601
+
1602
+ /**
1603
+ * Update learner-side evaluation scores for an interaction evaluation.
1604
+ *
1605
+ * @param {string} evalId - The interaction evaluation ID
1606
+ * @param {Object} evaluation - Learner evaluation data
1607
+ * @param {Object} evaluation.scores - Per-turn scores: { turnIndex: { dimension: {score, reasoning} } }
1608
+ * @param {number} evaluation.overallScore - Weighted average learner score (0-100)
1609
+ * @param {string} evaluation.judgeModel - Model used for judging
1610
+ */
1611
+ export function updateInteractionLearnerScores(evalId, evaluation) {
1612
+ const stmt = db.prepare(`
1613
+ UPDATE interaction_evaluations
1614
+ SET learner_scores = ?,
1615
+ learner_overall_score = ?,
1616
+ learner_judge_model = ?
1617
+ WHERE id = ?
1618
+ `);
1619
+
1620
+ stmt.run(
1621
+ JSON.stringify(evaluation.scores),
1622
+ evaluation.overallScore,
1623
+ evaluation.judgeModel || null,
1624
+ evalId
1625
+ );
1626
+ }
1627
+
1099
1628
  export default {
1100
1629
  createRun,
1101
1630
  updateRun,
1102
1631
  storeResult,
1632
+ storeRejudgment,
1633
+ updateResultScores,
1634
+ updateResultLearnerScores,
1103
1635
  getRun,
1104
1636
  listRuns,
1105
1637
  getResults,
@@ -1113,9 +1645,12 @@ export default {
1113
1645
  findIncompleteRuns,
1114
1646
  autoCompleteStaleRuns,
1115
1647
  getIncompleteTests,
1648
+ getFactorialCellData,
1116
1649
  // Interaction evaluations
1117
1650
  storeInteractionEval,
1118
1651
  listInteractionEvals,
1652
+ listInteractionEvalsByRunId,
1119
1653
  getInteractionEval,
1120
1654
  getInteractionEvalByRunId,
1655
+ updateInteractionLearnerScores,
1121
1656
  };