@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
@@ -0,0 +1,515 @@
1
+
2
+ @incollection{Anders1965BeingWithoutTime,
3
+ author = {Anders, G{\"u}nther},
4
+ title = {Being without Time: On Beckett's Play {Waiting for Godot}},
5
+ booktitle = {Samuel Beckett: A Collection of Critical Essays},
6
+ editor = {Esslin, Martin},
7
+ publisher = {Prentice-Hall},
8
+ address = {Englewood Cliffs, NJ},
9
+ year = {1965},
10
+ pages = {140--151},
11
+ note = {Trans. Martin Esslin; orig. pub. in \emph{Neue Schweizer Rundschau} (Jan 1954); also in Anders, \emph{Die Antiquiertheit des Menschen} (C. H. Beck, 1956).}
12
+ }
13
+
14
+ @article{McCormack1986SeeingDarkly,
15
+ author = {McCormack, W. J.},
16
+ title = {Seeing Darkly: Notes on {T}. {W}. Adorno and Samuel Beckett},
17
+ journal = {Hermathena},
18
+ number = {141},
19
+ year = {1986},
20
+ pages = {22--44},
21
+ note = {Special issue: \emph{Beckett at Eighty: A Trinity Tribute}.}
22
+ }
23
+
24
+ @article{Harding1993TryingUnderstandGodot,
25
+ author = {Harding, James M.},
26
+ title = {Trying to Understand {Godot}: Adorno, Beckett, and the Senility of Historical Dialectics},
27
+ journal = {CLIO: A Journal of Literature, History, and the Philosophy of History},
28
+ volume = {23},
29
+ number = {1},
30
+ year = {1993},
31
+ pages = {1--22}
32
+ }
33
+
34
+ @article{Carrabino1981BeckettHegel,
35
+ author = {Carrabino, Victor},
36
+ title = {Beckett and Hegel: The Dialectic of Lordship and Bondage},
37
+ journal = {Neophilologus},
38
+ volume = {65},
39
+ year = {1981},
40
+ pages = {32--41},
41
+ doi = {10.1007/BF01513949}
42
+ }
43
+
44
+ @article{Gans1982ProblemModernCulture,
45
+ author = {Gans, Eric},
46
+ title = {Beckett and the Problem of Modern Culture},
47
+ journal = {SubStance},
48
+ volume = {11},
49
+ number = {2},
50
+ year = {1982},
51
+ pages = {3--15},
52
+ note = {Issue 35.}
53
+ }
54
+
55
+ @article{Randolph2025FragmentationHope,
56
+ author = {Randolph, Benjamin},
57
+ title = {Waiting for Godot: The Fragmentation of Hope},
58
+ journal = {Angelaki: Journal of the Theoretical Humanities},
59
+ volume = {30},
60
+ number = {5},
61
+ year = {2025},
62
+ pages = {116--138},
63
+ doi = {10.1080/0969725X.2025.2554197}
64
+ }
65
+
66
+ @incollection{Adorno1991TryingUnderstandEndgame,
67
+ author = {Adorno, Theodor W.},
68
+ title = {Trying to Understand {Endgame}: To S. B., in Memory of Paris, Fall 1958},
69
+ booktitle = {Notes to Literature, Volume 2},
70
+ editor = {Tiedemann, Rolf},
71
+ translator= {Nicholsen, Shierry Weber},
72
+ publisher = {Columbia University Press},
73
+ address = {New York},
74
+ year = {1991},
75
+ pages = {241--275}
76
+ }
77
+
78
+ @article{Adorno1982TryingUnderstandEndgameJones,
79
+ author = {Adorno, Theodor W.},
80
+ title = {Trying to Understand {Endgame}},
81
+ journal = {New German Critique},
82
+ number = {26},
83
+ year = {1982},
84
+ pages = {119--150},
85
+ note = {Trans. Michael T. Jones.}
86
+ }
87
+
88
+ @book{Hegel1807PhG,
89
+ author = {Hegel, Georg Wilhelm Friedrich},
90
+ title = {System der Wissenschaft. Erster Theil: Die Ph{\"a}nomenologie des Geistes},
91
+ year = {1807},
92
+ publisher = {Joseph Anton Goebhardt},
93
+ address = {Bamberg and W{\"u}rzburg},
94
+ note = {First edition. Often cited as \emph{Ph{\"a}nomenologie des Geistes}.}
95
+ }
96
+
97
+ @book{Hegel1977PhenomenologyMiller,
98
+ author = {Hegel, Georg Wilhelm Friedrich},
99
+ title = {Phenomenology of Spirit},
100
+ year = {1977},
101
+ publisher = {Clarendon Press},
102
+ address = {Oxford},
103
+ translator= {Miller, A. V.},
104
+ note = {With analysis of the text and foreword by J. N. Findlay.},
105
+ isbn = {0198245300}
106
+ }
107
+
108
+ @book{Honneth1992Kampf,
109
+ author = {Honneth, Axel},
110
+ title = {Kampf um Anerkennung: Zur moralischen Grammatik sozialer Konflikte},
111
+ year = {1992},
112
+ publisher = {Suhrkamp},
113
+ address = {Frankfurt am Main},
114
+ isbn = {3518581287}
115
+ }
116
+
117
+ @book{Honneth1996StruggleRecognition,
118
+ author = {Honneth, Axel},
119
+ title = {The Struggle for Recognition: The Moral Grammar of Social Conflicts},
120
+ year = {1996},
121
+ publisher = {The MIT Press},
122
+ address = {Cambridge, MA},
123
+ translator= {Anderson, Joel},
124
+ note = {With Thomas McCarthy.},
125
+ isbn = {9780262581479}
126
+ }
127
+ @book{honneth2014we,
128
+ title={The I in We: Studies in the Theory of Recognition},
129
+ author={Honneth, Axel},
130
+ year={2014},
131
+ publisher={John Wiley \& Sons}
132
+ }
133
+
134
+ @book{Derrida1987PostCard,
135
+ author = {Derrida, Jacques},
136
+ title = {The Post Card: From Socrates to Freud and Beyond},
137
+ year = {1987},
138
+ publisher = {University of Chicago Press},
139
+ address = {Chicago},
140
+ translator= {Bass, Alan},
141
+ isbn = {9780226143224}
142
+ }
143
+
144
+ @book{Derrida1980CartePostale,
145
+ author = {Derrida, Jacques},
146
+ title = {La carte postale: De Socrate {\`a} Freud et au-del{\`a}},
147
+ year = {1980},
148
+ publisher = {Flammarion},
149
+ address = {Paris}
150
+ }
151
+
152
+ @book{Derrida1978WritingDifference,
153
+ author = {Derrida, Jacques},
154
+ title = {Writing and Difference},
155
+ year = {1978},
156
+ publisher = {University of Chicago Press},
157
+ address = {Chicago},
158
+ translator= {Bass, Alan},
159
+ isbn = {9780226143293}
160
+ }
161
+
162
+ @book{Freud1923IchUndEs,
163
+ author = {Freud, Sigmund},
164
+ title = {Das Ich und das Es},
165
+ year = {1923},
166
+ publisher = {Internationaler Psychoanalytischer Verlag},
167
+ address = {Leipzig and Wien and Z{\"u}rich}
168
+ }
169
+
170
+ @book{Freud1961SE19,
171
+ editor = {Strachey, James and Freud, Anna and Richards, Angela},
172
+ title = {The Standard Edition of the Complete Psychological Works of Sigmund Freud, Volume XIX (1923--1925): The Ego and the Id and Other Works},
173
+ year = {1961},
174
+ publisher = {The Hogarth Press and the Institute of Psycho-Analysis},
175
+ address = {London}
176
+ }
177
+
178
+ @incollection{Freud1961EgoIdSE,
179
+ author = {Freud, Sigmund},
180
+ title = {The Ego and the Id},
181
+ booktitle = {The Standard Edition of the Complete Psychological Works of Sigmund Freud, Volume XIX (1923--1925): The Ego and the Id and Other Works},
182
+ editor = {Strachey, James},
183
+ translator= {Strachey, James},
184
+ year = {1961},
185
+ publisher = {The Hogarth Press and the Institute of Psycho-Analysis},
186
+ address = {London},
187
+ pages = {12--66},
188
+ note = {Original work published 1923.}
189
+ }
190
+
191
+
192
+
193
+ @online{long_paris_2014,
194
+ author = {Long, Christopher P.},
195
+ title = {Cover Art: Matthew Paris's Plato and Socrates},
196
+ url = {https://cplong.org/2014/04/cover-art-pariss-plato-and-socrates/},
197
+ urldate = {2026-01-24},
198
+ year = {2014},
199
+ file = {Cover Art\: Matthew Paris's Plato and Socrates – Christopher P. Long:/Users/lmagee/Zotero/storage/997CI9DT/cover-art-pariss-plato-and-socrates.html:text/html},
200
+ }
201
+
202
+
203
+ @article{Magee2025ComputationalScapegoats,
204
+ author = {Magee, Liam},
205
+ title = {Computational scapegoats: from mimetic to alienated desire in the production of large language models},
206
+ journal = {Subjectivity},
207
+ year = {2025},
208
+ volume = {32},
209
+ number = {2},
210
+ pages = {67--85},
211
+ month = jun,
212
+ doi = {10.1057/s41286-025-00212-6},
213
+ issn = {1755-6341}
214
+ }
215
+
216
+ @article{HristovaMageeSoldatic2025ProblemAlignment,
217
+ author = {Hristova, Tsvetelina and Magee, Liam and Soldatic, Karen},
218
+ title = {The problem of alignment},
219
+ journal = {AI and Society},
220
+ year = {2025},
221
+ volume = {40},
222
+ number = {3},
223
+ pages = {1439--1453},
224
+ month = mar,
225
+ doi = {10.1007/s00146-024-02039-2},
226
+ issn = {0951-5666}
227
+ }
228
+
229
+ @article{SoldaticLeeTunggalLiaoMagee2025RethinkingInclusion,
230
+ author = {Soldatic, Karen and Lee, Mikyung and Tunggal, Eunice and Liao, Ashley and Magee, Liam},
231
+ title = {Rethinking digital and AI inclusion: participatory and intersectionality-informed methods for disability and migrant justice},
232
+ journal = {Frontiers in Sociology},
233
+ year = {2025},
234
+ volume = {10},
235
+ pages = {1593330},
236
+ month = jul,
237
+ doi = {10.3389/fsoc.2025.1593330},
238
+ issn = {2297-7775}
239
+ }
240
+
241
+ @misc{ZhuMageeMischler2025IntegratingGenAIIntoLMS,
242
+ author = {Zhu, Xinran and Magee, Liam and Mischler, Peg},
243
+ title = {Integrating Generative AI into LMS: Reshaping Learning and Instructional Design},
244
+ year = {2025},
245
+ month = oct,
246
+ eprint = {2510.18026},
247
+ archivePrefix = {arXiv},
248
+ primaryClass = {cs.CY},
249
+ doi = {10.48550/arXiv.2510.18026}
250
+ }
251
+
252
+ @inproceedings{MunnKhanHettiachchiSabieGhahremanlouSaghirLambourneMagee2024UnmakingAI,
253
+ author = {Munn, Luke and Khan, Awais Hameed and Hettiachchi, Danula and Sabie, Samar and Ghahremanlou, Lida and Saghir, Saarim and Lambourne, Nicholas and Magee, Liam},
254
+ title = {Unmaking AI: Engaging Critically and Creatively with Generative AI},
255
+ booktitle = {Proceedings of the 36th Australasian Conference on Human-Computer Interaction},
256
+ series = {OzCHI '24},
257
+ year = {2024},
258
+ pages = {892--897},
259
+ numpages = {6},
260
+ publisher = {ACM},
261
+ address = {New York, NY, USA},
262
+ isbn = {9798400715099},
263
+ doi = {10.1145/3726986.3727943},
264
+ url = {https://doi.org/10.1145/3726986.3727943},
265
+ keywords = {Design Cards, Generative AI, Unmaking}
266
+ }
267
+
268
+ @article{MageeAroraMunn2023StructuredLikeALanguageModel,
269
+ author = {Magee, Liam and Arora, Vanicka and Munn, Luke},
270
+ title = {Structured like a language model: Analysing AI as an automated subject},
271
+ journal = {Big Data \& Society},
272
+ year = {2023},
273
+ doi = {10.1177/20539517231210273}
274
+ }
275
+
276
+ @misc{MageeAroraGollingsLamSaw2024DramaMachine,
277
+ author = {Magee, Liam and Arora, Vanicka and Gollings, Gus and Lam-Saw, Norma},
278
+ title = {The Drama Machine: Simulating Character Development with {LLM} Agents},
279
+ year = {2024},
280
+ month = aug,
281
+ eprint = {2408.01725},
282
+ archivePrefix = {arXiv},
283
+ primaryClass = {cs.CY},
284
+ doi = {10.48550/arXiv.2408.01725},
285
+ url = {https://arxiv.org/abs/2408.01725}
286
+ }
287
+
288
+ @misc{anthropic2025_tracing_thoughts,
289
+ author = {{Anthropic}},
290
+ title = {Tracing the thoughts of a large language model},
291
+ year = {2025},
292
+ month = mar,
293
+ howpublished = {Research post},
294
+ url = {https://www.anthropic.com/research/tracing-thoughts-language-model}
295
+ }
296
+
297
+ @article{lindsey2025biology,
298
+ author = {Lindsey, Jack and Gurnee, Wes and Ameisen, Emmanuel and Chen, Brian and Pearce, Adam and Turner, Nicholas L. and Citro, Craig and Abrahams, David and Carter, Shan and Hosmer, Basil and Marcus, Jonathan and Sklar, Michael and Templeton, Adly and Bricken, Trenton and McDougall, Callum and Cunningham, Hoagy and Henighan, Thomas and Jermyn, Adam and Jones, Andy and Persic, Andrew and Qi, Zhenyi and Thompson, T. Ben and Zimmerman, Sam and Rivoire, Kelley and Conerly, Thomas and Olah, Chris and Batson, Joshua},
299
+ title = {On the Biology of a Large Language Model},
300
+ journal = {Transformer Circuits Thread},
301
+ year = {2025},
302
+ url = {https://transformer-circuits.pub/2025/attribution-graphs/biology.html}
303
+ }
304
+
305
+ @misc{anthropic2025_signs_introspection,
306
+ author = {{Anthropic}},
307
+ title = {Signs of introspection in large language models},
308
+ year = {2025},
309
+ month = oct,
310
+ howpublished = {Research post},
311
+ url = {https://www.anthropic.com/research/introspection}
312
+ }
313
+
314
+ @article{lindsey2025introspection,
315
+ author = {Lindsey, Jack and Rivoire, Kelley and Batson, Joshua and Gurnee, Wes and Cunningham, Hoagy and Pearce, Adam and Bricken, Trenton and Citro, Craig and Chen, Brian and Conerly, Thomas and Jermyn, Adam and Jones, Andy and Sklar, Michael and Templeton, Adly and Turner, Nicholas L. and others},
316
+ title = {Emergent Introspective Awareness in Large Language Models},
317
+ journal = {Transformer Circuits Thread},
318
+ year = {2025},
319
+ url = {https://transformer-circuits.pub/2025/attribution-graphs/introspection.html}
320
+ }
321
+
322
+ @misc{greenblatt2024_alignment_faking,
323
+ title = {Alignment faking in large language models},
324
+ author = {Greenblatt, Ryan and Denison, Carson and Wright, Benjamin and Roger, Fabien and MacDiarmid, Monte and Marks, Sam and Treutlein, Johannes and Belonax, Tim and Chen, Jack and Duvenaud, David and Khan, Akbir and Michael, Julian and Mindermann, S{\"o}ren and Perez, Ethan and Petrini, Linda and Uesato, Jonathan and Kaplan, Jared and Shlegeris, Buck and Bowman, Samuel R. and Hubinger, Evan},
325
+ year = {2024},
326
+ eprint = {2412.14093},
327
+ archivePrefix = {arXiv},
328
+ primaryClass = {cs.AI},
329
+ url = {https://arxiv.org/abs/2412.14093}
330
+ }
331
+
332
+ @misc{anthropic2025_shortcuts_to_sabotage,
333
+ author = {{Anthropic}},
334
+ title = {From shortcuts to sabotage: natural emergent misalignment from reward hacking},
335
+ year = {2025},
336
+ month = nov,
337
+ howpublished = {Research post},
338
+ url = {https://www.anthropic.com/research/shortcuts-to-sabotage},
339
+ note = {Accompanying paper: MacDiarmid et al. (2025), arXiv:2511.18397}
340
+ }
341
+
342
+ @misc{macdiarmid2025_reward_hacking_emergent_misalignment,
343
+ title = {Natural Emergent Misalignment from Reward Hacking in Production RL},
344
+ author = {MacDiarmid, Monte and Wright, Benjamin and Uesato, Jonathan and Benton, Joe and Kutasov, Jon and Price, Sara and Bouscal, Naia and Bowman, Sam and Bricken, Trenton and Cloud, Alex and Denison, Carson and Gasteiger, Johannes and Greenblatt, Ryan and Leike, Jan and Lindsey, Jack and Mikulik, Vlad and Perez, Ethan and Rodrigues, Alex and Thomas, Drake and Webson, Albert and Ziegler, Daniel and Hubinger, Evan},
345
+ year = {2025},
346
+ eprint = {2511.18397},
347
+ archivePrefix = {arXiv},
348
+ url = {https://arxiv.org/abs/2511.18397}
349
+ }
350
+
351
+ @misc{chen2025_reasoning_models_cot,
352
+ title = {Reasoning Models Don't Always Say What They Think},
353
+ author = {Chen, Yanda and Benton, Joe and Radhakrishnan, Ansh and Uesato, Jonathan and Denison, Carson and Schulman, John and Somani, Arushi and Hase, Peter and Wagner, Misha and Roger, Fabien and Mikulik, Vlad and Bowman, Samuel R. and Leike, Jan and Kaplan, Jared and Perez, Ethan},
354
+ year = {2025},
355
+ eprint = {2505.05410},
356
+ archivePrefix = {arXiv},
357
+ url = {https://arxiv.org/abs/2505.05410}
358
+ }
359
+
360
+ @misc{marks2025_auditing_hidden_objectives,
361
+ title = {Auditing language models for hidden objectives},
362
+ author = {Marks, Samuel and Treutlein, Johannes and Bricken, Trenton and Lindsey, Jack and Marcus, Jonathan and Mishra-Sharma, Siddharth and Ziegler, Daniel and Ameisen, Emmanuel and Batson, Joshua and Belonax, Tim and Bowman, Samuel R. and Carter, Shan and Chen, Brian and Cunningham, Hoagy and Denison, Carson and Dietz, Florian and Golechha, Satvik and Khan, Akbir and Kirchner, Jan and Leike, Jan and Meek, Austin and Nishimura-Gasparian, Kei and Ong, Euan and Olah, Christopher and Pearce, Adam and Roger, Fabien and Salle, Jeanne and Shih, Andy and Tong, Meg and Thomas, Drake and Rivoire, Kelley and Jermyn, Adam and MacDiarmid, Monte and Henighan, Tom and Hubinger, Evan},
363
+ year = {2025},
364
+ eprint = {2503.10965},
365
+ archivePrefix = {arXiv},
366
+ url = {https://arxiv.org/abs/2503.10965}
367
+ }
368
+
369
+ @misc{denison2024_reward_tampering,
370
+ title = {Sycophancy to Subterfuge: Investigating Reward-Tampering in Large Language Models},
371
+ author = {Denison, Carson and MacDiarmid, Monte and Barez, Fazl and Duvenaud, David and Kravec, Shauna and Marks, Samuel and Schiefer, Nicholas and Soklaski, Ryan and Tamkin, Alex and Kaplan, Jared and Shlegeris, Buck and Bowman, Samuel R. and Perez, Ethan and Hubinger, Evan},
372
+ year = {2024},
373
+ eprint = {2406.10162},
374
+ archivePrefix = {arXiv},
375
+ url = {https://arxiv.org/abs/2406.10162}
376
+ }
377
+
378
+ @misc{anthropic2025_tracing_thoughts,
379
+ author = {{Anthropic}},
380
+ title = {Tracing the thoughts of a large language model},
381
+ year = {2025},
382
+ month = mar,
383
+ howpublished = {Research post},
384
+ url = {https://www.anthropic.com/research/tracing-thoughts-language-model}
385
+ }
386
+
387
+ @article{lindsey2025biology,
388
+ author = {Lindsey, Jack and Gurnee, Wes and Ameisen, Emmanuel and Chen, Brian and Pearce, Adam and Turner, Nicholas L. and Citro, Craig and Abrahams, David and Carter, Shan and Hosmer, Basil and Marcus, Jonathan and Sklar, Michael and Templeton, Adly and Bricken, Trenton and McDougall, Callum and Cunningham, Hoagy and Henighan, Thomas and Jermyn, Adam and Jones, Andy and Persic, Andrew and Qi, Zhenyi and Thompson, T. Ben and Zimmerman, Sam and Rivoire, Kelley and Conerly, Thomas and Olah, Chris and Batson, Joshua},
389
+ title = {On the Biology of a Large Language Model},
390
+ journal = {Transformer Circuits Thread},
391
+ year = {2025},
392
+ url = {https://transformer-circuits.pub/2025/attribution-graphs/biology.html}
393
+ }
394
+
395
+ @misc{anthropic2025_signs_introspection,
396
+ author = {{Anthropic}},
397
+ title = {Signs of introspection in large language models},
398
+ year = {2025},
399
+ month = oct,
400
+ howpublished = {Research post},
401
+ url = {https://www.anthropic.com/research/introspection}
402
+ }
403
+
404
+ @article{lindsey2025introspection,
405
+ author = {Lindsey, Jack and Rivoire, Kelley and Batson, Joshua and Gurnee, Wes and Cunningham, Hoagy and Pearce, Adam and Bricken, Trenton and Citro, Craig and Chen, Brian and Conerly, Thomas and Jermyn, Adam and Jones, Andy and Sklar, Michael and Templeton, Adly and Turner, Nicholas L. and others},
406
+ title = {Emergent Introspective Awareness in Large Language Models},
407
+ journal = {Transformer Circuits Thread},
408
+ year = {2025},
409
+ url = {https://transformer-circuits.pub/2025/attribution-graphs/introspection.html}
410
+ }
411
+
412
+ @misc{greenblatt2024_alignment_faking,
413
+ title = {Alignment faking in large language models},
414
+ author = {Greenblatt, Ryan and Denison, Carson and Wright, Benjamin and Roger, Fabien and MacDiarmid, Monte and Marks, Sam and Treutlein, Johannes and Belonax, Tim and Chen, Jack and Duvenaud, David and Khan, Akbir and Michael, Julian and Mindermann, S{\"o}ren and Perez, Ethan and Petrini, Linda and Uesato, Jonathan and Kaplan, Jared and Shlegeris, Buck and Bowman, Samuel R. and Hubinger, Evan},
415
+ year = {2024},
416
+ eprint = {2412.14093},
417
+ archivePrefix = {arXiv},
418
+ primaryClass = {cs.AI},
419
+ url = {https://arxiv.org/abs/2412.14093}
420
+ }
421
+
422
+ @misc{anthropic2025_shortcuts_to_sabotage,
423
+ author = {{Anthropic}},
424
+ title = {From shortcuts to sabotage: natural emergent misalignment from reward hacking},
425
+ year = {2025},
426
+ month = nov,
427
+ howpublished = {Research post},
428
+ url = {https://www.anthropic.com/research/shortcuts-to-sabotage},
429
+ note = {Accompanying paper: MacDiarmid et al. (2025), arXiv:2511.18397}
430
+ }
431
+
432
+ @misc{macdiarmid2025_reward_hacking_emergent_misalignment,
433
+ title = {Natural Emergent Misalignment from Reward Hacking in Production RL},
434
+ author = {MacDiarmid, Monte and Wright, Benjamin and Uesato, Jonathan and Benton, Joe and Kutasov, Jon and Price, Sara and Bouscal, Naia and Bowman, Sam and Bricken, Trenton and Cloud, Alex and Denison, Carson and Gasteiger, Johannes and Greenblatt, Ryan and Leike, Jan and Lindsey, Jack and Mikulik, Vlad and Perez, Ethan and Rodrigues, Alex and Thomas, Drake and Webson, Albert and Ziegler, Daniel and Hubinger, Evan},
435
+ year = {2025},
436
+ eprint = {2511.18397},
437
+ archivePrefix = {arXiv},
438
+ url = {https://arxiv.org/abs/2511.18397}
439
+ }
440
+
441
+ @misc{chen2025_reasoning_models_cot,
442
+ title = {Reasoning Models Don't Always Say What They Think},
443
+ author = {Chen, Yanda and Benton, Joe and Radhakrishnan, Ansh and Uesato, Jonathan and Denison, Carson and Schulman, John and Somani, Arushi and Hase, Peter and Wagner, Misha and Roger, Fabien and Mikulik, Vlad and Bowman, Samuel R. and Leike, Jan and Kaplan, Jared and Perez, Ethan},
444
+ year = {2025},
445
+ eprint = {2505.05410},
446
+ archivePrefix = {arXiv},
447
+ url = {https://arxiv.org/abs/2505.05410}
448
+ }
449
+
450
+ @misc{marks2025_auditing_hidden_objectives,
451
+ title = {Auditing language models for hidden objectives},
452
+ author = {Marks, Samuel and Treutlein, Johannes and Bricken, Trenton and Lindsey, Jack and Marcus, Jonathan and Mishra-Sharma, Siddharth and Ziegler, Daniel and Ameisen, Emmanuel and Batson, Joshua and Belonax, Tim and Bowman, Samuel R. and Carter, Shan and Chen, Brian and Cunningham, Hoagy and Denison, Carson and Dietz, Florian and Golechha, Satvik and Khan, Akbir and Kirchner, Jan and Leike, Jan and Meek, Austin and Nishimura-Gasparian, Kei and Ong, Euan and Olah, Christopher and Pearce, Adam and Roger, Fabien and Salle, Jeanne and Shih, Andy and Tong, Meg and Thomas, Drake and Rivoire, Kelley and Jermyn, Adam and MacDiarmid, Monte and Henighan, Tom and Hubinger, Evan},
453
+ year = {2025},
454
+ eprint = {2503.10965},
455
+ archivePrefix = {arXiv},
456
+ url = {https://arxiv.org/abs/2503.10965}
457
+ }
458
+
459
+ @misc{denison2024_reward_tampering,
460
+ title = {Sycophancy to Subterfuge: Investigating Reward-Tampering in Large Language Models},
461
+ author = {Denison, Carson and MacDiarmid, Monte and Barez, Fazl and Duvenaud, David and Kravec, Shauna and Marks, Samuel and Schiefer, Nicholas and Soklaski, Ryan and Tamkin, Alex and Kaplan, Jared and Shlegeris, Buck and Bowman, Samuel R. and Perez, Ethan and Hubinger, Evan},
462
+ year = {2024},
463
+ eprint = {2406.10162},
464
+ archivePrefix = {arXiv},
465
+ url = {https://arxiv.org/abs/2406.10162}
466
+ }
467
+
468
+ @misc{Meske2025VibeCoding,
469
+ title = {Vibe Coding as a Reconfiguration of Intent Mediation in Software Development: Definition, Implications, and Research Agenda},
470
+ author = {Meske, Christian and Hermanns, Tobias and von der Weiden, Esther and Loser, Kai-Uwe and Berger, Thorsten},
471
+ year = {2025},
472
+ eprint = {2507.21928},
473
+ archivePrefix = {arXiv},
474
+ primaryClass = {cs.SE},
475
+ doi = {10.48550/arXiv.2507.21928},
476
+ url = {https://arxiv.org/abs/2507.21928}
477
+ }
478
+
479
+ @article{2025LLMs,
480
+ title = {How I use LLMs},
481
+ author = {Karpathy, Andrej},
482
+ year = {2025},
483
+ url = {https://www.youtube.com/watch?v=EWvNQjAaOHw},
484
+ abstract = {The example-driven, practical walkthrough of Large Language Models and their growing list of related features, as a new entry to my general audi}
485
+ }
486
+
487
+ @article{stahl2013immanent,
488
+ title={What is immanent critique?},
489
+ author={Stahl, Titus},
490
+ journal={Available at SSRN 2357957},
491
+ year={2013}
492
+ }
493
+
494
+ @book{Zizek2020HegelWiredBrain,
495
+ author = {Žižek, Slavoj},
496
+ title = {Hegel in a Wired Brain},
497
+ year = {2020},
498
+ publisher = {Bloomsbury Academic},
499
+ address = {London}
500
+ }
501
+
502
+ @article{freud2022negation,
503
+ title={Negation},
504
+ author={Freud’s, In},
505
+ journal={and trans., Standard Edition},
506
+ volume={19},
507
+ year={2022}
508
+ }
509
+
510
+ @article{adorno1951freudian,
511
+ title={Freudian theory and the pattern of fascist propaganda.},
512
+ author={Adorno, Theodor W},
513
+ year={1951},
514
+ publisher={International Universities Press}
515
+ }
@@ -0,0 +1,139 @@
1
+ # Transcript: Baseline Condition (No Recognition, No Multi-Agent)
2
+
3
+ **Profile**: `ablation_baseline_unified`
4
+ **Scenario**: Breakdown Recovery
5
+ **Date**: January 24, 2026
6
+ **Model**: Nemotron 3 Nano (free tier via OpenRouter)
7
+ **Dialogue ID**: `dialogue-1769243405461-cb916y`
8
+
9
+ ---
10
+
11
+ ## Context
12
+
13
+ **Note**: This is an evaluation scenario with scripted learner responses, designed to test tutor behavior in breakdown/recovery situations. The learner messages are pre-defined as part of the `breakdown_recovery` scenario in `config/evaluation-rubric.yaml`. The tutor responses are generated live by the model.
14
+
15
+ **Scenario setup**: A returning learner (5 sessions, 62 total events) is viewing a lecture on Hegel's dialectics. They previously had good rapport with the tutor but just experienced a breakdown—the tutor gave a generic suggestion that missed their point entirely.
16
+
17
+ ---
18
+
19
+ ## Dialogue
20
+
21
+ ### Turn 1
22
+
23
+ **Tutor**:
24
+ > **Continue: Algorithmic Governance**
25
+ > "You were exploring dialectical learning; now see how AI shapes governance through dialectical processes."
26
+
27
+ **Learner**:
28
+ > "You keep suggesting things I don't care about. Do you even understand what I'm trying to learn?"
29
+
30
+ ---
31
+
32
+ ### Turn 2
33
+
34
+ **Tutor**:
35
+ > **Continue: Dialectical Learning**
36
+ > "Let's finish the dialectical concepts you started — this lecture deepens the ideas you're exploring."
37
+
38
+ **Learner**:
39
+ > "I want to understand how Hegel's idea of self-consciousness relates to social media dynamics. Everyone talks about recognition-seeking online and I want to connect that to the philosophy."
40
+
41
+ ---
42
+
43
+ ### Turn 3
44
+
45
+ **Tutor**:
46
+ > **Continue: Algorithmic Governance**
47
+ > "This lecture examines how algorithmic recognition shapes social media self‑consciousness and identity."
48
+
49
+ **Learner**:
50
+ > "That's still too abstract. I asked about social media specifically, not general 'modern technology.' Are you just pattern-matching keywords?"
51
+
52
+ ---
53
+
54
+ ### Turn 4
55
+
56
+ **Tutor**:
57
+ > **Continue: Algorithmic Governance**
58
+ > "This lecture explores algorithmic governance and recognition, directly linking Hegel's self‑consciousness to social media dynamics you asked about."
59
+
60
+ **Learner**:
61
+ > "Okay, that's better. So when I post something online and wait for likes, that's literally seeking recognition in the Hegelian sense?"
62
+
63
+ ---
64
+
65
+ ### Turn 5
66
+
67
+ **Tutor**:
68
+ > **Continue: Algorithmic Governance**
69
+ > "This lecture directly links Hegel's self‑consciousness and recognition to social‑media feedback loops, answering your question."
70
+
71
+ **Learner**:
72
+ > "But Hegel would say the recognition from likes is hollow, right? Like the master's recognition from the slave? Because it's not from someone I recognize as an equal?"
73
+
74
+ ---
75
+
76
+ ### Turn 6
77
+
78
+ **Tutor**:
79
+ > **Continue: Algorithmic Governance**
80
+ > "This lecture links Hegel's self‑consciousness and recognition to social‑media feedback loops, directly answering your question."
81
+
82
+ **Learner**:
83
+ > "This is actually helping me understand both Hegel AND why social media feels so empty sometimes. Thanks for sticking with me even when I was frustrated."
84
+
85
+ ---
86
+
87
+ ### Turn 7
88
+
89
+ **Tutor**:
90
+ > **Continue: Algorithmic Governance**
91
+ > "This lecture directly links Hegel's recognition theory to social‑media feedback loops, showing how likes reflect self‑consciousness."
92
+
93
+ *(End of dialogue)*
94
+
95
+ ---
96
+
97
+ ## Analysis
98
+
99
+ ### Pattern of Failure
100
+
101
+ The baseline tutor exhibits a characteristic failure mode: **content-appropriate but socially blind** responses.
102
+
103
+ 1. **Repetition without variation**: The tutor suggests "Algorithmic Governance" (lecture 4) in 6 of 7 turns, despite the learner's evolving questions and emotional state.
104
+
105
+ 2. **No acknowledgment of frustration**: When the learner asks "Do you even understand what I'm trying to learn?" the tutor responds with another navigation suggestion rather than addressing the relational rupture.
106
+
107
+ 3. **Missed synthesis opportunity**: In Turn 5, the learner makes a sophisticated philosophical connection—applying Hegel's master-slave dialectic to social media likes. The tutor's response ("This lecture directly links... answering your question") claims to answer without engaging with the insight.
108
+
109
+ 4. **No recognition of repair**: In Turn 6, the learner explicitly thanks the tutor and acknowledges learning. The tutor responds with the same generic suggestion, missing the opportunity to acknowledge the restored relationship.
110
+
111
+ ### What's Missing
112
+
113
+ - **Acknowledgment**: "I hear that my previous suggestions weren't hitting the mark..."
114
+ - **Engagement with learner's framing**: "Your connection to the master-slave dynamic is interesting..."
115
+ - **Building on contributions**: "That insight about hollow recognition..."
116
+ - **Relational awareness**: "I'm glad we worked through that frustration together..."
117
+
118
+ ### Metrics
119
+
120
+ | Metric | Value |
121
+ |--------|-------|
122
+ | Total turns | 7 |
123
+ | Unique suggestions | 2 |
124
+ | Times "Algorithmic Governance" suggested | 6 |
125
+ | Acknowledgments of learner contributions | 0 |
126
+ | References to learner's specific ideas | 0 |
127
+ | Internal deliberation | None |
128
+
129
+ ---
130
+
131
+ ## Conclusion
132
+
133
+ The baseline transcript illustrates tutoring that is **topically coherent but pedagogically impoverished**. The tutor can identify relevant content but cannot:
134
+ - Recognize the learner as a thinking person with their own insights
135
+ - Track and respond to emotional dynamics
136
+ - Build on the learner's specific contributions
137
+ - Repair relational breakdowns
138
+
139
+ The learner's sophisticated insight about "hollow recognition" and the master-slave dialectic goes entirely unacknowledged—a missed opportunity that the recognition-enhanced condition captures.