@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -0,0 +1,716 @@
1
+ # Local Tutor Agent Configuration (Eval Repo Override)
2
+ #
3
+ # This file overrides tutor-core's tutor-agents.yaml for evaluation purposes.
4
+ # Model aliases (e.g. "sonnet", "nemotron") are resolved through providers.yaml.
5
+ #
6
+ # ============================================================================
7
+ # MODEL OVERRIDES (optional)
8
+ # ============================================================================
9
+ # These override ALL profile models when uncommented. Useful for quick testing.
10
+ # CLI flags (--model, --ego-model, --superego-model) take precedence over these.
11
+ #
12
+ # model_override: openrouter.haiku # Override ALL models (ego + superego)
13
+ # ego_model_override: openrouter.nemotron # Override only ego model
14
+ # superego_model_override: openrouter.kimi-k2.5 # Override only superego model
15
+ #
16
+ # ============================================================================
17
+ # FACTORIAL EVALUATION DESIGN
18
+ # ============================================================================
19
+ #
20
+ # Three independent variables:
21
+ #
22
+ # Factor A: Prompt Type — base / enhanced / recognition (3 levels)
23
+ # Factor B: Tutor Architecture — single ego vs ego+superego dialogue (2 levels)
24
+ # Factor C: Learner Architecture — unified vs ego_superego (2 levels)
25
+ #
26
+ # Factor A levels:
27
+ # - base: Original Claude-generated prompts (default LLM tutoring)
28
+ # - enhanced: Matches recognition specificity without recognition theory
29
+ # - recognition: Full recognition-enhanced prompts with Hegelian theory
30
+ # - hardwired: Base + 5 superego-derived rules (no live superego) [ABLATION]
31
+ #
32
+ # This design enables isolating:
33
+ # - Base vs Enhanced = prompt engineering effect
34
+ # - Enhanced vs Recognition = isolated recognition theory effect
35
+ # - Base vs Recognition = combined effect (original comparison)
36
+ #
37
+ # All cells use nemotron (free tier) to isolate architectural effects from model cost.
38
+ #
39
+ # Design Matrix (12 main cells + 2 ablation cells):
40
+ #
41
+ # ┌────────────────────┬──────────────────────────┬──────────────────────────┐
42
+ # │ │ Unified Learner │ Ego/Superego Learner │
43
+ # ├────────────────────┼──────────────┬───────────┼──────────────┬───────────┤
44
+ # │ │ Single Tutor │ E+S Tutor │ Single Tutor │ E+S Tutor │
45
+ # ├────────────────────┼──────────────┼───────────┼──────────────┼───────────┤
46
+ # │ Base │ cell_1 │ cell_3 │ cell_2 │ cell_4 │
47
+ # ├────────────────────┼──────────────┼───────────┼──────────────┼───────────┤
48
+ # │ Enhanced │ cell_9 │ cell_11 │ cell_10 │ cell_12 │
49
+ # ├────────────────────┼──────────────┼───────────┼──────────────┼───────────┤
50
+ # │ Recognition │ cell_5 │ cell_7 │ cell_6 │ cell_8 │
51
+ # └────────────────────┴──────────────┴───────────┴──────────────┴───────────┘
52
+ #
53
+ # ABLATION (hardwired = superego rules in ego prompt, no live superego):
54
+ # ┌────────────────────┬──────────────┬──────────────┐
55
+ # │ Hardwired │ cell_13 │ cell_14 │
56
+ # └────────────────────┴──────────────┴──────────────┘
57
+ #
58
+ # Ablation comparisons:
59
+ # - cell_1/2 vs cell_13/14 = Do hardwired rules improve single-agent?
60
+ # - cell_3/4 vs cell_13/14 = Can hardwired rules replace live superego?
61
+ #
62
+ # This allows analyzing:
63
+ # - Main effect of prompt type (base vs enhanced vs recognition)
64
+ # - Main effect of tutor architecture (single agent vs ego + superego dialogue)
65
+ # - Main effect of learner architecture (unified vs ego_superego)
66
+ # - All two-way and three-way interactions
67
+ # - Superego architecture ablation (rules vs dynamic dialogue)
68
+ #
69
+ # ============================================================================
70
+
71
+ active_profile: budget
72
+
73
+ profiles:
74
+ # ===========================================================================
75
+ # DEVELOPMENT PROFILE (not part of factorial design)
76
+ # ===========================================================================
77
+
78
+ budget:
79
+ description: "Budget-friendly single agent - Nemotron (free) via OpenRouter, no dialogue"
80
+ dialogue:
81
+ enabled: false
82
+ max_rounds: 0
83
+ ego:
84
+ provider: openrouter
85
+ model: nemotron
86
+ prompt_file: tutor-ego.md
87
+ hyperparameters:
88
+ temperature: 0.6
89
+ max_tokens: 8000
90
+ superego: null
91
+
92
+ # ===========================================================================
93
+ # 2×2×2 FACTORIAL CELLS (Recognition × Tutor Architecture × Learner Architecture)
94
+ # ===========================================================================
95
+ # These 8 profiles support the unified factorial evaluation pipeline.
96
+ # Use: node scripts/eval-cli.js run --factorial --runs 3
97
+ # All use nemotron (free tier) to isolate architectural effects from model cost.
98
+
99
+ # Cell 1: Base × Single Agent × Unified Learner
100
+ cell_1_base_single_unified:
101
+ description: "Factorial cell 1: baseline single-agent tutor, unified learner"
102
+ factors:
103
+ prompt_type: base
104
+ multi_agent_tutor: false
105
+ multi_agent_learner: false
106
+ learner_architecture: unified
107
+ recognition_mode: false
108
+ memory_enabled: false
109
+ dialogue:
110
+ enabled: false
111
+ max_rounds: 0
112
+ ego:
113
+ provider: openrouter
114
+ model: nemotron
115
+ prompt_file: tutor-ego.md
116
+ hyperparameters:
117
+ temperature: 0.6
118
+ max_tokens: 8000
119
+ superego: null
120
+
121
+ # Cell 2: Base × Single Agent × Ego/Superego Learner
122
+ cell_2_base_single_psycho:
123
+ description: "Factorial cell 2: baseline single-agent tutor, ego/superego learner"
124
+ factors:
125
+ prompt_type: base
126
+ multi_agent_tutor: false
127
+ multi_agent_learner: true
128
+ learner_architecture: ego_superego
129
+ recognition_mode: false
130
+ memory_enabled: false
131
+ dialogue:
132
+ enabled: false
133
+ max_rounds: 0
134
+ ego:
135
+ provider: openrouter
136
+ model: nemotron
137
+ prompt_file: tutor-ego.md
138
+ hyperparameters:
139
+ temperature: 0.6
140
+ max_tokens: 8000
141
+ superego: null
142
+
143
+ # Cell 3: Base × Multi-Agent Tutor × Unified Learner
144
+ cell_3_base_multi_unified:
145
+ description: "Factorial cell 3: ego+superego tutor, unified learner"
146
+ factors:
147
+ prompt_type: base
148
+ multi_agent_tutor: true
149
+ multi_agent_learner: false
150
+ learner_architecture: unified
151
+ recognition_mode: false
152
+ memory_enabled: false
153
+ dialogue:
154
+ enabled: true
155
+ max_rounds: 2
156
+ convergence_threshold: 0.8
157
+ ego:
158
+ provider: openrouter
159
+ model: nemotron
160
+ staging: front
161
+ prompt_file: tutor-ego.md
162
+ hyperparameters:
163
+ temperature: 0.6
164
+ max_tokens: 8000
165
+ superego:
166
+ provider: openrouter
167
+ model: kimi-k2.5
168
+ staging: back
169
+ prompt_file: tutor-superego.md
170
+ hyperparameters:
171
+ temperature: 0.2
172
+ max_tokens: 8000
173
+
174
+ # Cell 4: Base × Multi-Agent Tutor × Ego/Superego Learner
175
+ cell_4_base_multi_psycho:
176
+ description: "Factorial cell 4: ego+superego tutor, ego/superego learner"
177
+ factors:
178
+ prompt_type: base
179
+ multi_agent_tutor: true
180
+ multi_agent_learner: true
181
+ learner_architecture: ego_superego
182
+ recognition_mode: false
183
+ memory_enabled: false
184
+ dialogue:
185
+ enabled: true
186
+ max_rounds: 2
187
+ convergence_threshold: 0.8
188
+ ego:
189
+ provider: openrouter
190
+ model: nemotron
191
+ staging: front
192
+ prompt_file: tutor-ego.md
193
+ hyperparameters:
194
+ temperature: 0.6
195
+ max_tokens: 8000
196
+ superego:
197
+ provider: openrouter
198
+ model: kimi-k2.5
199
+ staging: back
200
+ prompt_file: tutor-superego.md
201
+ hyperparameters:
202
+ temperature: 0.2
203
+ max_tokens: 8000
204
+
205
+ # Cell 5: Recognition × Single Agent × Unified Learner
206
+ cell_5_recog_single_unified:
207
+ description: "Factorial cell 5: recognition single-agent tutor, unified learner"
208
+ factors:
209
+ prompt_type: recognition
210
+ multi_agent_tutor: false
211
+ multi_agent_learner: false
212
+ learner_architecture: unified_recognition
213
+ recognition_mode: true
214
+ memory_enabled: true
215
+ dialogue:
216
+ enabled: false
217
+ max_rounds: 0
218
+ ego:
219
+ provider: openrouter
220
+ model: nemotron
221
+ prompt_file: tutor-ego-recognition.md
222
+ hyperparameters:
223
+ temperature: 0.6
224
+ max_tokens: 8000
225
+ superego: null
226
+
227
+ # Cell 6: Recognition × Single Agent × Ego/Superego Learner
228
+ cell_6_recog_single_psycho:
229
+ description: "Factorial cell 6: recognition single-agent tutor, ego/superego learner"
230
+ factors:
231
+ prompt_type: recognition
232
+ multi_agent_tutor: false
233
+ multi_agent_learner: true
234
+ learner_architecture: ego_superego_recognition
235
+ recognition_mode: true
236
+ memory_enabled: true
237
+ dialogue:
238
+ enabled: false
239
+ max_rounds: 0
240
+ ego:
241
+ provider: openrouter
242
+ model: kimi-k2.5
243
+ prompt_file: tutor-ego-recognition.md
244
+ hyperparameters:
245
+ temperature: 0.6
246
+ max_tokens: 8000
247
+ superego: null
248
+
249
+ # Cell 7: Recognition × Multi-Agent Tutor × Unified Learner
250
+ cell_7_recog_multi_unified:
251
+ description: "Factorial cell 7: recognition ego+superego tutor, unified learner"
252
+ factors:
253
+ prompt_type: recognition
254
+ multi_agent_tutor: true
255
+ multi_agent_learner: false
256
+ learner_architecture: unified_recognition
257
+ recognition_mode: true
258
+ memory_enabled: true
259
+ dialogue:
260
+ enabled: true
261
+ max_rounds: 2
262
+ convergence_threshold: 0.7
263
+ ego:
264
+ provider: openrouter
265
+ model: nemotron
266
+ staging: front
267
+ prompt_file: tutor-ego-recognition.md
268
+ hyperparameters:
269
+ temperature: 0.6
270
+ max_tokens: 8000
271
+ superego:
272
+ provider: openrouter
273
+ model: kimi-k2.5
274
+ staging: back
275
+ prompt_file: tutor-superego-recognition.md
276
+ hyperparameters:
277
+ temperature: 0.2
278
+ max_tokens: 8000
279
+ intervention_strategies:
280
+ enforce_mutual_recognition: true
281
+ require_memory_integration: true
282
+ assess_transformative_potential: true
283
+
284
+ # Cell 8: Recognition × Multi-Agent Tutor × Ego/Superego Learner
285
+ cell_8_recog_multi_psycho:
286
+ description: "Factorial cell 8: recognition ego+superego tutor, ego/superego learner"
287
+ factors:
288
+ prompt_type: recognition
289
+ multi_agent_tutor: true
290
+ multi_agent_learner: true
291
+ learner_architecture: ego_superego_recognition
292
+ recognition_mode: true
293
+ memory_enabled: true
294
+ dialogue:
295
+ enabled: true
296
+ max_rounds: 2
297
+ convergence_threshold: 0.7
298
+ ego:
299
+ provider: openrouter
300
+ model: kimi-k2.5
301
+ staging: front
302
+ prompt_file: tutor-ego-recognition.md
303
+ hyperparameters:
304
+ temperature: 0.6
305
+ max_tokens: 8000
306
+ superego:
307
+ provider: openrouter
308
+ model: kimi-k2.5
309
+ staging: back
310
+ prompt_file: tutor-superego-recognition.md
311
+ hyperparameters:
312
+ temperature: 0.2
313
+ max_tokens: 8000
314
+ intervention_strategies:
315
+ enforce_mutual_recognition: true
316
+ require_memory_integration: true
317
+ assess_transformative_potential: true
318
+
319
+ # ===========================================================================
320
+ # ENHANCED CELLS (9-12): Better prompting WITHOUT recognition theory
321
+ # ===========================================================================
322
+ # These cells use tutor-ego-enhanced.md and tutor-superego-enhanced.md which
323
+ # match the recognition prompts' specificity (8 rules, 10 examples, checklists)
324
+ # but WITHOUT Hegelian recognition theory language.
325
+ #
326
+ # Comparing Enhanced vs Recognition isolates the recognition theory effect.
327
+ # Comparing Base vs Enhanced isolates the prompt engineering effect.
328
+
329
+ # Cell 9: Enhanced × Single Agent × Unified Learner
330
+ cell_9_enhanced_single_unified:
331
+ description: "Factorial cell 9: enhanced single-agent tutor, unified learner"
332
+ factors:
333
+ prompt_type: enhanced
334
+ multi_agent_tutor: false
335
+ multi_agent_learner: false
336
+ learner_architecture: unified
337
+ recognition_mode: false
338
+ memory_enabled: false
339
+ dialogue:
340
+ enabled: false
341
+ max_rounds: 0
342
+ ego:
343
+ provider: openrouter
344
+ model: nemotron
345
+ prompt_file: tutor-ego-enhanced.md
346
+ hyperparameters:
347
+ temperature: 0.6
348
+ max_tokens: 8000
349
+ superego: null
350
+
351
+ # Cell 10: Enhanced × Single Agent × Ego/Superego Learner
352
+ cell_10_enhanced_single_psycho:
353
+ description: "Factorial cell 10: enhanced single-agent tutor, ego/superego learner"
354
+ factors:
355
+ prompt_type: enhanced
356
+ multi_agent_tutor: false
357
+ multi_agent_learner: true
358
+ learner_architecture: ego_superego
359
+ recognition_mode: false
360
+ memory_enabled: false
361
+ dialogue:
362
+ enabled: false
363
+ max_rounds: 0
364
+ ego:
365
+ provider: openrouter
366
+ model: nemotron
367
+ prompt_file: tutor-ego-enhanced.md
368
+ hyperparameters:
369
+ temperature: 0.6
370
+ max_tokens: 8000
371
+ superego: null
372
+
373
+ # Cell 11: Enhanced × Multi-Agent Tutor × Unified Learner
374
+ cell_11_enhanced_multi_unified:
375
+ description: "Factorial cell 11: enhanced ego+superego tutor, unified learner"
376
+ factors:
377
+ prompt_type: enhanced
378
+ multi_agent_tutor: true
379
+ multi_agent_learner: false
380
+ learner_architecture: unified
381
+ recognition_mode: false
382
+ memory_enabled: false
383
+ dialogue:
384
+ enabled: true
385
+ max_rounds: 2
386
+ convergence_threshold: 0.8
387
+ ego:
388
+ provider: openrouter
389
+ model: nemotron
390
+ staging: front
391
+ prompt_file: tutor-ego-enhanced.md
392
+ hyperparameters:
393
+ temperature: 0.6
394
+ max_tokens: 8000
395
+ superego:
396
+ provider: openrouter
397
+ model: kimi-k2.5
398
+ staging: back
399
+ prompt_file: tutor-superego-enhanced.md
400
+ hyperparameters:
401
+ temperature: 0.2
402
+ max_tokens: 8000
403
+
404
+ # Cell 12: Enhanced × Multi-Agent Tutor × Ego/Superego Learner
405
+ cell_12_enhanced_multi_psycho:
406
+ description: "Factorial cell 12: enhanced ego+superego tutor, ego/superego learner"
407
+ factors:
408
+ prompt_type: enhanced
409
+ multi_agent_tutor: true
410
+ multi_agent_learner: true
411
+ learner_architecture: ego_superego
412
+ recognition_mode: false
413
+ memory_enabled: false
414
+ dialogue:
415
+ enabled: true
416
+ max_rounds: 2
417
+ convergence_threshold: 0.8
418
+ ego:
419
+ provider: openrouter
420
+ model: nemotron
421
+ staging: front
422
+ prompt_file: tutor-ego-enhanced.md
423
+ hyperparameters:
424
+ temperature: 0.6
425
+ max_tokens: 8000
426
+ superego:
427
+ provider: openrouter
428
+ model: kimi-k2.5
429
+ staging: back
430
+ prompt_file: tutor-superego-enhanced.md
431
+ hyperparameters:
432
+ temperature: 0.2
433
+ max_tokens: 8000
434
+
435
+ # ===========================================================================
436
+ # HARDWIRED CELLS (13-14): Superego-derived rules, NO live superego
437
+ # ===========================================================================
438
+ # These cells test whether the superego's value is in the rules it enforces
439
+ # or in the dynamic dialogue itself.
440
+ #
441
+ # Uses tutor-ego-hardwired.md which embeds 5 rules derived from analyzing
442
+ # 186 superego rejections (engagement, specificity, memory, level-matching,
443
+ # absolute struggle stop).
444
+ #
445
+ # Comparison:
446
+ # cell_1/2 (base, no superego) vs cell_13/14 (hardwired, no superego)
447
+ # → Does hardwiring superego rules improve single-agent performance?
448
+ # cell_3/4 (base + superego) vs cell_13/14 (hardwired, no superego)
449
+ # → Can hardwired rules replace live superego dialogue?
450
+
451
+ # Cell 13: Hardwired × Single Agent × Unified Learner
452
+ cell_13_hardwired_single_unified:
453
+ description: "Factorial cell 13: hardwired single-agent tutor (superego rules embedded), unified learner"
454
+ factors:
455
+ prompt_type: hardwired
456
+ multi_agent_tutor: false
457
+ multi_agent_learner: false
458
+ learner_architecture: unified
459
+ recognition_mode: false
460
+ memory_enabled: false
461
+ dialogue:
462
+ enabled: false
463
+ max_rounds: 0
464
+ ego:
465
+ provider: openrouter
466
+ model: nemotron
467
+ prompt_file: tutor-ego-hardwired.md
468
+ hyperparameters:
469
+ temperature: 0.6
470
+ max_tokens: 8000
471
+ superego: null
472
+
473
+ # Cell 14: Hardwired × Single Agent × Ego/Superego Learner
474
+ cell_14_hardwired_single_psycho:
475
+ description: "Factorial cell 14: hardwired single-agent tutor (superego rules embedded), ego/superego learner"
476
+ factors:
477
+ prompt_type: hardwired
478
+ multi_agent_tutor: false
479
+ multi_agent_learner: true
480
+ learner_architecture: ego_superego
481
+ recognition_mode: false
482
+ memory_enabled: false
483
+ dialogue:
484
+ enabled: false
485
+ max_rounds: 0
486
+ ego:
487
+ provider: openrouter
488
+ model: nemotron
489
+ prompt_file: tutor-ego-hardwired.md
490
+ hyperparameters:
491
+ temperature: 0.6
492
+ max_tokens: 8000
493
+ superego: null
494
+
495
+ # ===========================================================================
496
+ # PLACEBO CELLS (15-18): Length-matched prompts WITHOUT recognition theory
497
+ # ===========================================================================
498
+ # These cells control for prompt length/complexity vs recognition theory content.
499
+ # Placebo prompts match recognition prompt length but use only pedagogical
500
+ # best practices without Hegelian concepts (mutual recognition, autonomous
501
+ # subject, productive struggle as transformation).
502
+ #
503
+ # Comparison:
504
+ # cell_9/10 (enhanced) vs cell_15/16 (placebo) vs cell_5/6 (recognition)
505
+ # → Isolates recognition theory value from prompt length/complexity
506
+
507
+ # Cell 15: Placebo × Single Agent × Unified Learner
508
+ cell_15_placebo_single_unified:
509
+ description: "Factorial cell 15: placebo single-agent tutor (length-matched, no recognition theory), unified learner"
510
+ factors:
511
+ prompt_type: placebo
512
+ multi_agent_tutor: false
513
+ multi_agent_learner: false
514
+ learner_architecture: unified
515
+ recognition_mode: false
516
+ memory_enabled: true
517
+ dialogue:
518
+ enabled: false
519
+ max_rounds: 0
520
+ ego:
521
+ provider: openrouter
522
+ model: nemotron
523
+ prompt_file: tutor-ego-placebo.md
524
+ hyperparameters:
525
+ temperature: 0.6
526
+ max_tokens: 8000
527
+ superego: null
528
+
529
+ # Cell 16: Placebo × Single Agent × Ego/Superego Learner
530
+ cell_16_placebo_single_psycho:
531
+ description: "Factorial cell 16: placebo single-agent tutor (length-matched, no recognition theory), ego/superego learner"
532
+ factors:
533
+ prompt_type: placebo
534
+ multi_agent_tutor: false
535
+ multi_agent_learner: true
536
+ learner_architecture: ego_superego
537
+ recognition_mode: false
538
+ memory_enabled: true
539
+ dialogue:
540
+ enabled: false
541
+ max_rounds: 0
542
+ ego:
543
+ provider: openrouter
544
+ model: nemotron
545
+ prompt_file: tutor-ego-placebo.md
546
+ hyperparameters:
547
+ temperature: 0.6
548
+ max_tokens: 8000
549
+ superego: null
550
+
551
+ # Cell 17: Placebo × Multi-Agent Tutor × Unified Learner
552
+ cell_17_placebo_multi_unified:
553
+ description: "Factorial cell 17: placebo ego+superego tutor (length-matched, no recognition theory), unified learner"
554
+ factors:
555
+ prompt_type: placebo
556
+ multi_agent_tutor: true
557
+ multi_agent_learner: false
558
+ learner_architecture: unified
559
+ recognition_mode: false
560
+ memory_enabled: true
561
+ dialogue:
562
+ enabled: true
563
+ max_rounds: 2
564
+ convergence_threshold: 0.7
565
+ ego:
566
+ provider: openrouter
567
+ model: nemotron
568
+ staging: front
569
+ prompt_file: tutor-ego-placebo.md
570
+ hyperparameters:
571
+ temperature: 0.6
572
+ max_tokens: 8000
573
+ superego:
574
+ provider: openrouter
575
+ model: kimi-k2.5
576
+ staging: back
577
+ prompt_file: tutor-superego-placebo.md
578
+ hyperparameters:
579
+ temperature: 0.2
580
+ max_tokens: 8000
581
+
582
+ # Cell 18: Placebo × Multi-Agent Tutor × Ego/Superego Learner
583
+ cell_18_placebo_multi_psycho:
584
+ description: "Factorial cell 18: placebo ego+superego tutor (length-matched, no recognition theory), ego/superego learner"
585
+ factors:
586
+ prompt_type: placebo
587
+ multi_agent_tutor: true
588
+ multi_agent_learner: true
589
+ learner_architecture: ego_superego
590
+ recognition_mode: false
591
+ memory_enabled: true
592
+ dialogue:
593
+ enabled: true
594
+ max_rounds: 2
595
+ convergence_threshold: 0.7
596
+ ego:
597
+ provider: openrouter
598
+ model: nemotron
599
+ staging: front
600
+ prompt_file: tutor-ego-placebo.md
601
+ hyperparameters:
602
+ temperature: 0.6
603
+ max_tokens: 8000
604
+ superego:
605
+ provider: openrouter
606
+ model: kimi-k2.5
607
+ staging: back
608
+ prompt_file: tutor-superego-placebo.md
609
+ hyperparameters:
610
+ temperature: 0.2
611
+ max_tokens: 8000
612
+
613
+ # ===========================================================================
614
+ # MEMORY CONFOUND ISOLATION CELLS (19-20)
615
+ # ===========================================================================
616
+ # 2×2 Memory × Recognition design (single-agent, unified learner held constant).
617
+ # Reuses cell_1 (base) and cell_5 (full recognition) to complete the 2×2.
618
+ #
619
+ # Memory OFF Memory ON
620
+ # Recog OFF cell_1 (exists) cell_19 (NEW)
621
+ # Recog ON cell_20 (NEW) cell_5 (exists)
622
+
623
+ # Cell 19: Memory-Only × Single Agent × Unified Learner
624
+ cell_19_memory_single_unified:
625
+ description: "Memory isolation: base + memory integration, no recognition theory"
626
+ factors:
627
+ prompt_type: memory
628
+ multi_agent_tutor: false
629
+ multi_agent_learner: false
630
+ learner_architecture: unified
631
+ recognition_mode: false
632
+ memory_enabled: true
633
+ dialogue:
634
+ enabled: false
635
+ max_rounds: 0
636
+ ego:
637
+ provider: openrouter
638
+ model: nemotron
639
+ prompt_file: tutor-ego-memory.md
640
+ hyperparameters:
641
+ temperature: 0.6
642
+ max_tokens: 8000
643
+ superego: null
644
+
645
+ # Cell 20: Recognition-No-Memory × Single Agent × Unified Learner
646
+ cell_20_recog_nomem_single_unified:
647
+ description: "Memory isolation: recognition theory without memory integration"
648
+ factors:
649
+ prompt_type: recognition_nomem
650
+ multi_agent_tutor: false
651
+ multi_agent_learner: false
652
+ learner_architecture: unified
653
+ recognition_mode: true
654
+ memory_enabled: false
655
+ dialogue:
656
+ enabled: false
657
+ max_rounds: 0
658
+ ego:
659
+ provider: openrouter
660
+ model: nemotron
661
+ prompt_file: tutor-ego-recognition-nomem.md
662
+ hyperparameters:
663
+ temperature: 0.6
664
+ max_tokens: 8000
665
+ superego: null
666
+
667
+ # ===========================================================================
668
+ # DYNAMIC PROMPT REWRITING CELL (21) - v2
669
+ # ===========================================================================
670
+ # Tests whether feeding deliberation insights back as directives
671
+ # improves multi-turn tutoring outcomes. Compare against cell_7 (static).
672
+ #
673
+ # v2 improvements:
674
+ # - LLM-based directive synthesis (uses superego model for rich context analysis)
675
+ # - Writing Pad memory activated via learnerId threading
676
+ # - Dialectical negotiation enabled (memory surfaces in prompts)
677
+
678
+ cell_21_recog_multi_unified_rewrite:
679
+ description: "Prompt rewriting v2: LLM directives + active Writing Pad"
680
+ factors:
681
+ prompt_type: recognition
682
+ multi_agent_tutor: true
683
+ multi_agent_learner: false
684
+ dynamic_rewriting: true
685
+ learner_architecture: unified_recognition
686
+ recognition_mode: true
687
+ memory_enabled: true
688
+ writing_pad_enabled: true # Explicitly enable Writing Pad
689
+ dialectical_negotiation: true # Memory surfaces in prompts
690
+ prompt_rewriting:
691
+ enabled: true
692
+ strategy: llm # LLM-based synthesis (vs 'template')
693
+ dialogue:
694
+ enabled: true
695
+ max_rounds: 2
696
+ convergence_threshold: 0.7
697
+ ego:
698
+ provider: openrouter
699
+ model: nemotron
700
+ staging: front
701
+ prompt_file: tutor-ego-recognition.md
702
+ hyperparameters:
703
+ temperature: 0.6
704
+ max_tokens: 8000
705
+ superego:
706
+ provider: openrouter
707
+ model: kimi-k2.5
708
+ staging: back
709
+ prompt_file: tutor-superego-recognition.md
710
+ hyperparameters:
711
+ temperature: 0.2
712
+ max_tokens: 8000
713
+ intervention_strategies:
714
+ enforce_mutual_recognition: true
715
+ require_memory_integration: true
716
+ assess_transformative_potential: true