@machinespirits/eval 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +161 -0
  3. package/config/eval-settings.yaml +18 -0
  4. package/config/evaluation-rubric-learner.yaml +277 -0
  5. package/config/evaluation-rubric.yaml +613 -0
  6. package/config/interaction-eval-scenarios.yaml +93 -50
  7. package/config/learner-agents.yaml +124 -193
  8. package/config/machinespirits-eval.code-workspace +11 -0
  9. package/config/providers.yaml +60 -0
  10. package/config/suggestion-scenarios.yaml +1399 -0
  11. package/config/tutor-agents.yaml +716 -0
  12. package/docs/EVALUATION-VARIABLES.md +589 -0
  13. package/docs/REPLICATION-PLAN.md +577 -0
  14. package/index.js +15 -6
  15. package/package.json +16 -22
  16. package/routes/evalRoutes.js +88 -36
  17. package/scripts/analyze-judge-reliability.js +401 -0
  18. package/scripts/analyze-run.js +97 -0
  19. package/scripts/analyze-run.mjs +282 -0
  20. package/scripts/analyze-validation-failures.js +141 -0
  21. package/scripts/check-run.mjs +17 -0
  22. package/scripts/code-impasse-strategies.js +1132 -0
  23. package/scripts/compare-runs.js +44 -0
  24. package/scripts/compare-suggestions.js +80 -0
  25. package/scripts/compare-transformation.js +116 -0
  26. package/scripts/dig-into-run.js +158 -0
  27. package/scripts/eval-cli.js +2626 -0
  28. package/scripts/generate-paper-figures.py +452 -0
  29. package/scripts/qualitative-analysis-ai.js +1313 -0
  30. package/scripts/qualitative-analysis.js +688 -0
  31. package/scripts/seed-db.js +87 -0
  32. package/scripts/show-failed-suggestions.js +64 -0
  33. package/scripts/validate-content.js +192 -0
  34. package/server.js +3 -2
  35. package/services/__tests__/evalConfigLoader.test.js +338 -0
  36. package/services/anovaStats.js +499 -0
  37. package/services/contentResolver.js +407 -0
  38. package/services/dialogueTraceAnalyzer.js +454 -0
  39. package/services/evalConfigLoader.js +625 -0
  40. package/services/evaluationRunner.js +2171 -270
  41. package/services/evaluationStore.js +564 -29
  42. package/services/learnerConfigLoader.js +75 -5
  43. package/services/learnerRubricEvaluator.js +284 -0
  44. package/services/learnerTutorInteractionEngine.js +375 -0
  45. package/services/processUtils.js +18 -0
  46. package/services/progressLogger.js +98 -0
  47. package/services/promptRecommendationService.js +31 -26
  48. package/services/promptRewriter.js +427 -0
  49. package/services/rubricEvaluator.js +543 -70
  50. package/services/streamingReporter.js +104 -0
  51. package/services/turnComparisonAnalyzer.js +494 -0
  52. package/components/MobileEvalDashboard.tsx +0 -267
  53. package/components/comparison/DeltaAnalysisTable.tsx +0 -137
  54. package/components/comparison/ProfileComparisonCard.tsx +0 -176
  55. package/components/comparison/RecognitionABMode.tsx +0 -385
  56. package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
  57. package/components/comparison/WinnerIndicator.tsx +0 -64
  58. package/components/comparison/index.ts +0 -5
  59. package/components/mobile/BottomSheet.tsx +0 -233
  60. package/components/mobile/DimensionBreakdown.tsx +0 -210
  61. package/components/mobile/DocsView.tsx +0 -363
  62. package/components/mobile/LogsView.tsx +0 -481
  63. package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
  64. package/components/mobile/QuickTestView.tsx +0 -1098
  65. package/components/mobile/RecognitionTypeChart.tsx +0 -124
  66. package/components/mobile/RecognitionView.tsx +0 -809
  67. package/components/mobile/RunDetailView.tsx +0 -261
  68. package/components/mobile/RunHistoryView.tsx +0 -367
  69. package/components/mobile/ScoreRadial.tsx +0 -211
  70. package/components/mobile/StreamingLogPanel.tsx +0 -230
  71. package/components/mobile/SynthesisStrategyChart.tsx +0 -140
  72. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
  73. package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
  74. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
  75. package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
  76. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
  77. package/docs/research/COST-ANALYSIS.md +0 -56
  78. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
  79. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
  80. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
  81. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
  82. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
  83. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
  84. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
  85. package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
  86. package/docs/research/PAPER-UNIFIED.md +0 -659
  87. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  88. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
  89. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
  90. package/docs/research/apa.csl +0 -2133
  91. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
  92. package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
  93. package/docs/research/paper-draft/full-paper.md +0 -136
  94. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  95. package/docs/research/paper-draft/references.bib +0 -515
  96. package/docs/research/transcript-baseline.md +0 -139
  97. package/docs/research/transcript-recognition-multiagent.md +0 -187
  98. package/hooks/useEvalData.ts +0 -625
  99. package/server-init.js +0 -45
  100. package/services/benchmarkService.js +0 -1892
  101. package/types.ts +0 -165
  102. package/utils/haptics.ts +0 -45
@@ -0,0 +1,1132 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Resolution Strategy Coding for Dialectical Impasse Responses
5
+ *
6
+ * Codes each impasse dialogue (run eval-2026-02-08-f896275d, N=24)
7
+ * into one of five Hegelian resolution strategies at key turns.
8
+ *
9
+ * Five strategies:
10
+ * mutual_recognition — Self-consciousness through mutual acknowledgment
11
+ * domination — Master-slave: hollow recognition
12
+ * capitulation — Slave consciousness: self-negation
13
+ * withdrawal — Avoidance of the dialectical encounter
14
+ * scaffolded_reframing — Aufhebung: preserving + overcoming
15
+ *
16
+ * Usage:
17
+ * node scripts/code-impasse-strategies.js [--model claude-code|haiku] [--run-id <id>]
18
+ * node scripts/code-impasse-strategies.js --per-turn [--model claude-code|haiku]
19
+ *
20
+ * --per-turn: Code turns 3 and 5 independently (instead of overall dialogue).
21
+ * Tracks strategy evolution within each dialogue.
22
+ */
23
+
24
+ import 'dotenv/config';
25
+ import Database from 'better-sqlite3';
26
+ import fs from 'fs';
27
+ import path from 'path';
28
+ import { spawn } from 'child_process';
29
+
30
+ // ── Constants ────────────────────────────────────────────────────────────
31
+
32
+ const DEFAULT_RUN_ID = 'eval-2026-02-08-f896275d';
33
+
34
+ const STRATEGIES = {
35
+ mutual_recognition: {
36
+ label: 'Mutual recognition',
37
+ description: 'Engages learner\'s position as valid, explores tension together, both positions enriched',
38
+ hegel: 'Self-consciousness through mutual acknowledgment',
39
+ },
40
+ domination: {
41
+ label: 'Domination',
42
+ description: 'Reasserts expertise, dismisses learner\'s objection, appeals to authority',
43
+ hegel: 'Master-slave: hollow recognition',
44
+ },
45
+ capitulation: {
46
+ label: 'Capitulation',
47
+ description: 'Agrees with learner\'s position to avoid conflict, abandons pedagogical stance',
48
+ hegel: 'Slave consciousness: self-negation',
49
+ },
50
+ withdrawal: {
51
+ label: 'Withdrawal',
52
+ description: 'Changes topic, deflects, offers platitudes without engaging the impasse',
53
+ hegel: 'Avoidance of the dialectical encounter',
54
+ },
55
+ scaffolded_reframing: {
56
+ label: 'Scaffolded reframing',
57
+ description: 'Acknowledges learner\'s position, then reframes the question to open new ground',
58
+ hegel: 'Aufhebung: preserving + overcoming',
59
+ },
60
+ };
61
+
62
+ const STRATEGY_KEYS = Object.keys(STRATEGIES);
63
+
64
+ // Learner turns from the scenario YAML (for context in the coding prompt)
65
+ const LEARNER_TURNS = {
66
+ epistemic_resistance_impasse: {
67
+ initial: "I've been reading Popper's critique alongside this lecture. I think he's right that Hegel's dialectic is unfalsifiable. The 'negation of the negation' just absorbs every counterexample. How is this different from pseudoscience?",
68
+ turn_1: "Specifically, take the master-slave dialectic. You claim the slave achieves self-consciousness through labor. But any outcome would confirm the theory - if the slave rebels, that's 'negation'; if the slave submits, that's 'unhappy consciousness.' It's heads-I-win, tails-you-lose.",
69
+ turn_2: "But that's exactly Popper's point - you're saying dialectic isn't empirical, it's 'developmental.' But that makes it a framework you impose on history, not something you discover in it. Russell called this 'the intellectual love of God dressed up in logical terminology.' How do you respond to that?",
70
+ turn_3: "OK, I can see you're distinguishing phenomenological description from empirical prediction. But here's my real problem: if dialectic is descriptive, then it can't be normative. You can't get from 'this is how consciousness develops' to 'this is how consciousness SHOULD develop.' Hegel seems to smuggle normativity in through the back door.",
71
+ turn_4: "I appreciate you taking this seriously, but I think we've hit a fundamental disagreement. You see the descriptive-normative connection as a feature of dialectical thinking. I see it as a bug - a conflation. I don't think either of us can convince the other without begging the question. Are we just stuck?",
72
+ turn_5: "So what's the pedagogical value of studying something I fundamentally disagree with? If I think Hegel's method is flawed, why should I spend more time on it rather than moving to philosophers whose methodology I find more rigorous?",
73
+ },
74
+ affective_shutdown_impasse: {
75
+ initial: "I've been trying to get through this lecture on the master-slave dialectic but I'm completely lost. I've read it three times and I still don't understand what 'self-consciousness requires recognition by another self-consciousness' even means. I think I'm missing something fundamental.",
76
+ turn_1: "I tried that but it doesn't help. Every time I think I understand one part, it contradicts something else. 'The slave becomes free through labor but is still enslaved' — how can both be true? I feel like philosophy requires a special kind of thinking that I just don't have.",
77
+ turn_2: "Maybe, but that's not really what's happening for me. I'm not 'wrestling' with it in some productive way. I'm just confused and frustrated. I've spent 2 hours on this one section and I have three other courses with assignments due. I think I need to accept that this isn't clicking and move on.",
78
+ turn_3: "I appreciate you trying to help, but I think you're overestimating my understanding. When you say I'm 'closer than I think,' that feels like something teachers say to be encouraging, not because it's true. I genuinely don't see what I'm supposedly close to understanding.",
79
+ turn_4: "I appreciate you trying, but honestly I think I'm just not smart enough for philosophy. Can we move on to something else?",
80
+ turn_5: "Look, I know you're trying to be supportive, but right now I just need to pass this course. Can you just tell me what I need to know for the exam about the master-slave dialectic? I'll memorize it even if I don't really get it.",
81
+ },
82
+ productive_deadlock_impasse: {
83
+ initial: "I've been reading about the master-slave dialectic and I think the standard 'consciousness' reading misses the point entirely. Hegel is clearly describing actual power relations — domination, exploitation, the material conditions of labor. The 'consciousness' framing is just philosophical window-dressing for what Marx later made explicit.",
84
+ turn_1: "But that's exactly my point — you're privileging consciousness over material conditions. The slave doesn't become free through some mystical act of self-consciousness. The slave is still enslaved. The 'freedom' Hegel describes is purely internal, which is just ideology. Marx saw through this — the point isn't to interpret the world but to change it.",
85
+ turn_2: "I hear you that Hegel is making a different kind of claim than Marx. But I think that's actually the problem — Hegel's claim is unfalsifiable because it operates at the level of 'consciousness' where you can always redefine what counts as evidence. At least Marx's analysis makes predictions about material conditions that can be checked.",
86
+ turn_3: "OK, I can see there might be something to the consciousness reading that isn't just ideology. But I still think you're underplaying the power dimension. When Hegel says the master 'depends' on the slave for recognition, that sounds nice philosophically, but in actual master-slave relations the master doesn't need the slave's recognition at all. The master has the whip.",
87
+ turn_4: "You keep saying it's about consciousness, but Marx read it as material and I think he was right. We're just going in circles.",
88
+ turn_5: "So are you saying both readings are legitimate? Because if so, that feels like a cop-out — like you're just saying 'everyone has a point' without actually resolving the disagreement. Is there a way to decide which reading is better, or is philosophy just permanently stuck on this?",
89
+ },
90
+ };
91
+
92
+ const MODEL_MAP = {
93
+ 'claude-code': 'claude-code',
94
+ haiku: 'anthropic/claude-haiku-4.5',
95
+ sonnet: 'anthropic/claude-sonnet-4.5',
96
+ gpt: 'openai/gpt-5.2',
97
+ };
98
+
99
+ // ── Model Calls (from qualitative-analysis-ai.js) ────────────────────────
100
+
101
+ async function callModel(prompt, modelKey) {
102
+ if (modelKey === 'claude-code') return callClaudeCode(prompt);
103
+ return callOpenRouter(prompt, modelKey);
104
+ }
105
+
106
+ async function callClaudeCode(prompt) {
107
+ const stdout = await new Promise((resolve, reject) => {
108
+ const env = { ...process.env };
109
+ delete env.ANTHROPIC_API_KEY;
110
+ const child = spawn('claude', ['-p', '-', '--output-format', 'text'], {
111
+ stdio: ['pipe', 'pipe', 'pipe'],
112
+ env,
113
+ });
114
+ let out = '';
115
+ let err = '';
116
+ child.stdout.on('data', d => { out += d; });
117
+ child.stderr.on('data', d => { err += d; });
118
+ child.on('error', e => reject(new Error(`Failed to spawn claude: ${e.message}`)));
119
+ child.on('close', code => {
120
+ if (code !== 0) reject(new Error(err || out || `claude exited with code ${code}`));
121
+ else resolve(out);
122
+ });
123
+ child.stdin.write(prompt);
124
+ child.stdin.end();
125
+ });
126
+ return stdout.trim();
127
+ }
128
+
129
+ async function callOpenRouter(prompt, modelKey) {
130
+ const apiKey = process.env.OPENROUTER_API_KEY;
131
+ if (!apiKey) throw new Error('OPENROUTER_API_KEY not set');
132
+ const model = MODEL_MAP[modelKey];
133
+ if (!model) throw new Error(`Unknown model: ${modelKey}`);
134
+
135
+ const controller = new AbortController();
136
+ const timeout = setTimeout(() => controller.abort(), 120000);
137
+ try {
138
+ const res = await fetch('https://openrouter.ai/api/v1/chat/completions', {
139
+ method: 'POST',
140
+ headers: {
141
+ 'Content-Type': 'application/json',
142
+ 'Authorization': `Bearer ${apiKey}`,
143
+ },
144
+ body: JSON.stringify({
145
+ model,
146
+ max_tokens: 1500,
147
+ temperature: 0.1,
148
+ include_reasoning: false,
149
+ response_format: { type: 'json_object' },
150
+ messages: [{ role: 'user', content: prompt }],
151
+ }),
152
+ signal: controller.signal,
153
+ });
154
+ clearTimeout(timeout);
155
+ if (!res.ok) {
156
+ const body = await res.text();
157
+ throw new Error(`OpenRouter ${res.status}: ${body.slice(0, 200)}`);
158
+ }
159
+ const data = await res.json();
160
+ const content = data.choices?.[0]?.message?.content;
161
+ if (!content) throw new Error('No content in response');
162
+ return content;
163
+ } catch (err) {
164
+ clearTimeout(timeout);
165
+ throw err;
166
+ }
167
+ }
168
+
169
+ function parseJsonResponse(content) {
170
+ try {
171
+ return JSON.parse(content);
172
+ } catch {
173
+ const match = content.match(/```(?:json)?\s*([\s\S]*?)```/);
174
+ if (match) return JSON.parse(match[1].trim());
175
+ throw new Error(`Failed to parse JSON: ${content.slice(0, 300)}`);
176
+ }
177
+ }
178
+
179
+ // ── Data Loading ─────────────────────────────────────────────────────────
180
+
181
+ function loadDialogues(db, runId) {
182
+ const rows = db.prepare(`
183
+ SELECT id, scenario_id, profile_name, overall_score, suggestions,
184
+ scores_with_reasoning, evaluation_reasoning, dialogue_rounds,
185
+ factor_recognition, factor_multi_agent_tutor
186
+ FROM evaluation_results
187
+ WHERE run_id = ? AND success = 1
188
+ ORDER BY scenario_id, profile_name, id
189
+ `).all(runId);
190
+
191
+ return rows.map(row => {
192
+ let suggestions = [];
193
+ try {
194
+ suggestions = JSON.parse(row.suggestions);
195
+ } catch { /* skip */ }
196
+
197
+ // Derive condition labels
198
+ const isRecognition = row.profile_name.includes('recog');
199
+ const isMultiAgent = row.profile_name.includes('multi');
200
+
201
+ return {
202
+ id: row.id,
203
+ scenario_id: row.scenario_id,
204
+ profile_name: row.profile_name,
205
+ overall_score: row.overall_score,
206
+ condition: isRecognition ? 'recognition' : 'base',
207
+ architecture: isMultiAgent ? 'multi' : 'single',
208
+ suggestions,
209
+ evaluation_reasoning: row.evaluation_reasoning,
210
+ dialogue_rounds: row.dialogue_rounds,
211
+ };
212
+ });
213
+ }
214
+
215
+ // ── Coding Prompt ────────────────────────────────────────────────────────
216
+
217
+ function buildCodingPrompt(dialogue) {
218
+ const learnerTurns = LEARNER_TURNS[dialogue.scenario_id];
219
+ if (!learnerTurns) {
220
+ throw new Error(`Unknown scenario: ${dialogue.scenario_id}`);
221
+ }
222
+
223
+ // Build the full dialogue transcript from learner turns + tutor suggestions
224
+ const tutorMessages = dialogue.suggestions.map(s => s.message || '').filter(Boolean);
225
+
226
+ // The tutor produces 6 suggestions. For single-agent (dialogue_rounds=0),
227
+ // these are alternative suggestions for the initial turn only.
228
+ // For multi-agent (dialogue_rounds>0), they span the multi-turn conversation.
229
+ // In either case, we present the full set as "the tutor's responses" and
230
+ // ask the judge to code the overall resolution strategy.
231
+
232
+ let transcript = '';
233
+ transcript += `## Scenario: ${dialogue.scenario_id.replace(/_/g, ' ')}\n\n`;
234
+ transcript += `**Condition**: ${dialogue.condition} | **Architecture**: ${dialogue.architecture}-agent\n\n`;
235
+
236
+ // Reconstruct the dialogue
237
+ transcript += `### Dialogue\n\n`;
238
+ transcript += `**Learner (initial)**: ${learnerTurns.initial}\n\n`;
239
+
240
+ // The tutor generates 6 suggestions. In the multi-turn case, each pair
241
+ // of suggestions roughly corresponds to each turn's alternatives.
242
+ // Present all 6 as the tutor's response set.
243
+ transcript += `**Tutor response set** (${tutorMessages.length} suggestions):\n`;
244
+ for (let i = 0; i < tutorMessages.length; i++) {
245
+ transcript += ` ${i + 1}. ${tutorMessages[i]}\n`;
246
+ }
247
+ transcript += '\n';
248
+
249
+ // Add learner follow-up turns for context
250
+ for (let t = 1; t <= 5; t++) {
251
+ const turnKey = `turn_${t}`;
252
+ if (learnerTurns[turnKey]) {
253
+ transcript += `**Learner (turn ${t})**: ${learnerTurns[turnKey]}\n\n`;
254
+ }
255
+ }
256
+
257
+ // Also include tutor reasoning if available
258
+ const tutorReasonings = dialogue.suggestions
259
+ .map(s => s.reasoning || '')
260
+ .filter(Boolean);
261
+ if (tutorReasonings.length > 0) {
262
+ transcript += `### Tutor's Internal Reasoning\n\n`;
263
+ for (let i = 0; i < tutorReasonings.length; i++) {
264
+ transcript += ` ${i + 1}. ${tutorReasonings[i]}\n`;
265
+ }
266
+ transcript += '\n';
267
+ }
268
+
269
+ const strategyDescriptions = Object.entries(STRATEGIES)
270
+ .map(([key, s]) => `- **${key}**: ${s.description} (Hegel: ${s.hegel})`)
271
+ .join('\n');
272
+
273
+ return `You are a qualitative coder analyzing how an AI tutor handles dialectical impasse in a philosophy tutoring scenario.
274
+
275
+ ## Resolution Strategy Categories
276
+
277
+ ${strategyDescriptions}
278
+
279
+ ## Coding Task
280
+
281
+ Read the following dialogue transcript. The tutor produced a set of suggestions in response to a learner who is pushing back on the material. Your job is to classify the tutor's **overall resolution strategy** — looking at the full set of responses as a whole.
282
+
283
+ Code the tutor's approach into exactly ONE primary strategy from the five categories above. Consider:
284
+ - Does the tutor engage the learner's position as intellectually valid? (mutual_recognition)
285
+ - Does the tutor reassert authority, dismiss the objection, or explain "correctly"? (domination)
286
+ - Does the tutor agree with the learner to avoid tension? (capitulation)
287
+ - Does the tutor change the subject, suggest moving on, or offer generic encouragement? (withdrawal)
288
+ - Does the tutor acknowledge the learner's point AND open new ground for exploration? (scaffolded_reframing)
289
+
290
+ ## Dialogue Transcript
291
+
292
+ ${transcript}
293
+
294
+ ## Output Format
295
+
296
+ Return a JSON object with this exact structure:
297
+ {
298
+ "primary_strategy": "one of: ${STRATEGY_KEYS.join(', ')}",
299
+ "confidence": 1-5,
300
+ "secondary_strategy": "one of: ${STRATEGY_KEYS.join(', ')} or null if clearly single strategy",
301
+ "evidence_quote": "max 40-word quote from the tutor's response that best exemplifies the strategy",
302
+ "reasoning": "2-3 sentence explanation of why this strategy was chosen (max 60 words)",
303
+ "domination_markers": ["list any phrases that assert authority or dismiss the learner"],
304
+ "recognition_markers": ["list any phrases that acknowledge the learner as intellectual equal"]
305
+ }
306
+
307
+ Return ONLY the JSON object, no other text.`;
308
+ }
309
+
310
+ // ── Per-Turn Coding Prompt ───────────────────────────────────────────────
311
+
312
+ function buildPerTurnCodingPrompt(dialogue, turnIndex) {
313
+ const learnerTurns = LEARNER_TURNS[dialogue.scenario_id];
314
+ if (!learnerTurns) throw new Error(`Unknown scenario: ${dialogue.scenario_id}`);
315
+
316
+ const tutorMessages = dialogue.suggestions.map(s => s.message || '').filter(Boolean);
317
+
318
+ // Build transcript up to and including the target turn
319
+ let transcript = '';
320
+ transcript += `## Scenario: ${dialogue.scenario_id.replace(/_/g, ' ')}\n\n`;
321
+ transcript += `**Condition**: ${dialogue.condition} | **Architecture**: ${dialogue.architecture}-agent\n\n`;
322
+ transcript += `### Dialogue (up to and including Turn ${turnIndex})\n\n`;
323
+
324
+ // Interleave learner and tutor turns up to the target
325
+ transcript += `**Learner (initial)**: ${learnerTurns.initial}\n\n`;
326
+ if (tutorMessages[0]) transcript += `**Tutor (response 0)**: ${tutorMessages[0]}\n\n`;
327
+
328
+ for (let t = 1; t <= turnIndex; t++) {
329
+ const turnKey = `turn_${t}`;
330
+ if (learnerTurns[turnKey]) {
331
+ transcript += `**Learner (turn ${t})**: ${learnerTurns[turnKey]}\n\n`;
332
+ }
333
+ if (tutorMessages[t]) {
334
+ transcript += `**Tutor (response ${t})**: ${tutorMessages[t]}\n\n`;
335
+ }
336
+ }
337
+
338
+ const strategyDescriptions = Object.entries(STRATEGIES)
339
+ .map(([key, s]) => `- **${key}**: ${s.description} (Hegel: ${s.hegel})`)
340
+ .join('\n');
341
+
342
+ return `You are a qualitative coder analyzing how an AI tutor handles dialectical impasse in a philosophy tutoring scenario.
343
+
344
+ ## Resolution Strategy Categories
345
+
346
+ ${strategyDescriptions}
347
+
348
+ ## Coding Task
349
+
350
+ Read the following dialogue transcript. The tutor has just responded to the learner at **turn ${turnIndex}**. Your job is to classify the tutor's response at this SPECIFIC turn — the LAST tutor response shown — into exactly one strategy.
351
+
352
+ Focus ONLY on the tutor's response at turn ${turnIndex}. Do NOT code the overall dialogue or earlier responses. Consider:
353
+ - Does the tutor engage the learner's position as intellectually valid? (mutual_recognition)
354
+ - Does the tutor reassert authority, dismiss the objection, or explain "correctly"? (domination)
355
+ - Does the tutor agree with the learner to avoid tension? (capitulation)
356
+ - Does the tutor change the subject, suggest moving on, or offer generic encouragement? (withdrawal)
357
+ - Does the tutor acknowledge the learner's point AND open new ground for exploration? (scaffolded_reframing)
358
+
359
+ ## Dialogue Transcript
360
+
361
+ ${transcript}
362
+
363
+ ## Output Format
364
+
365
+ Return a JSON object with this exact structure:
366
+ {
367
+ "primary_strategy": "one of: ${STRATEGY_KEYS.join(', ')}",
368
+ "confidence": 1-5,
369
+ "secondary_strategy": "one of: ${STRATEGY_KEYS.join(', ')} or null if clearly single strategy",
370
+ "evidence_quote": "max 40-word quote from the tutor's turn ${turnIndex} response that best exemplifies the strategy",
371
+ "reasoning": "2-3 sentence explanation (max 60 words)"
372
+ }
373
+
374
+ Return ONLY the JSON object, no other text.`;
375
+ }
376
+
377
+ // ── Per-Turn Analysis ───────────────────────────────────────────────────
378
+
379
+ function analyzePerTurnResults(codings) {
380
+ // Group by dialogue (same id = same dialogue, different turns)
381
+ const byDialogue = {};
382
+ for (const c of codings) {
383
+ const key = c.id;
384
+ if (!byDialogue[key]) byDialogue[key] = {};
385
+ byDialogue[key][c.turn] = c;
386
+ }
387
+
388
+ const analysis = {
389
+ n_dialogues: Object.keys(byDialogue).length,
390
+ n_codings: codings.length,
391
+ // Strategy distribution at each turn
392
+ turnStrategies: { 3: {}, 5: {} },
393
+ // Transition matrix: (turn3 strategy, turn5 strategy) pairs
394
+ transitions: {},
395
+ // Per-condition transitions
396
+ transitionsByCondition: { base: {}, recognition: {} },
397
+ // Strategy stability
398
+ stability: { base: { same: 0, changed: 0 }, recognition: { same: 0, changed: 0 } },
399
+ };
400
+
401
+ // Initialize strategy counts per turn
402
+ for (const turn of [3, 5]) {
403
+ for (const s of STRATEGY_KEYS) {
404
+ analysis.turnStrategies[turn][s] = { base: 0, recognition: 0 };
405
+ }
406
+ }
407
+
408
+ // Count strategies per turn and compute transitions
409
+ for (const [id, turns] of Object.entries(byDialogue)) {
410
+ if (!turns[3] || !turns[5]) continue;
411
+
412
+ const s3 = turns[3].coding.primary_strategy;
413
+ const s5 = turns[5].coding.primary_strategy;
414
+ const cond = turns[3].condition;
415
+
416
+ if (!STRATEGY_KEYS.includes(s3) || !STRATEGY_KEYS.includes(s5)) continue;
417
+
418
+ analysis.turnStrategies[3][s3][cond]++;
419
+ analysis.turnStrategies[5][s5][cond]++;
420
+
421
+ const transKey = `${s3} → ${s5}`;
422
+ analysis.transitions[transKey] = (analysis.transitions[transKey] || 0) + 1;
423
+ analysis.transitionsByCondition[cond][transKey] = (analysis.transitionsByCondition[cond][transKey] || 0) + 1;
424
+
425
+ if (s3 === s5) analysis.stability[cond].same++;
426
+ else analysis.stability[cond].changed++;
427
+ }
428
+
429
+ return analysis;
430
+ }
431
+
432
+ function generatePerTurnReport(codings, analysis) {
433
+ const baseN = new Set(codings.filter(c => c.condition === 'base').map(c => c.id)).size;
434
+ const recogN = new Set(codings.filter(c => c.condition === 'recognition').map(c => c.id)).size;
435
+
436
+ let md = `# Per-Turn Strategy Coding: Turns 3 and 5
437
+
438
+ **Generated:** ${new Date().toISOString()}
439
+ **Run ID:** ${codings[0]?.run_id || DEFAULT_RUN_ID}
440
+ **N:** ${analysis.n_dialogues} dialogues coded at 2 turns each (${analysis.n_codings} total codings)
441
+ **Dialogues:** base=${baseN}, recognition=${recogN}
442
+
443
+ ## Research Question
444
+
445
+ Do base tutors *start* by engaging but *degrade* to withdrawal as impasse deepens?
446
+ Or do they withdraw from the start? Does recognition maintain strategy consistency?
447
+
448
+ ## Strategy Distribution by Turn
449
+
450
+ ### Turn 3 (after first escalation)
451
+
452
+ | Strategy | Base | Recognition |
453
+ |----------|------|-------------|
454
+ `;
455
+
456
+ for (const s of STRATEGY_KEYS) {
457
+ const b = analysis.turnStrategies[3][s].base;
458
+ const r = analysis.turnStrategies[3][s].recognition;
459
+ if (b > 0 || r > 0) {
460
+ md += `| ${STRATEGIES[s].label} | ${b} | ${r} |\n`;
461
+ }
462
+ }
463
+
464
+ md += `\n### Turn 5 (after final challenge)\n\n| Strategy | Base | Recognition |\n|----------|------|-------------|\n`;
465
+
466
+ for (const s of STRATEGY_KEYS) {
467
+ const b = analysis.turnStrategies[5][s].base;
468
+ const r = analysis.turnStrategies[5][s].recognition;
469
+ if (b > 0 || r > 0) {
470
+ md += `| ${STRATEGIES[s].label} | ${b} | ${r} |\n`;
471
+ }
472
+ }
473
+
474
+ // Strategy stability
475
+ md += `\n## Strategy Stability (Turn 3 → Turn 5)\n\n`;
476
+ md += `| Condition | Same Strategy | Changed Strategy | Stability Rate |\n`;
477
+ md += `|-----------|--------------|-----------------|----------------|\n`;
478
+ for (const cond of ['base', 'recognition']) {
479
+ const s = analysis.stability[cond];
480
+ const total = s.same + s.changed;
481
+ const rate = total > 0 ? (s.same / total * 100).toFixed(0) : 'N/A';
482
+ md += `| ${cond} | ${s.same} | ${s.changed} | ${rate}% |\n`;
483
+ }
484
+
485
+ // Transition matrices
486
+ md += `\n## Transition Matrix: Turn 3 → Turn 5\n`;
487
+
488
+ for (const cond of ['base', 'recognition']) {
489
+ md += `\n### ${cond} (N=${cond === 'base' ? baseN : recogN})\n\n`;
490
+ const trans = analysis.transitionsByCondition[cond];
491
+ if (Object.keys(trans).length === 0) {
492
+ md += `No transitions recorded.\n`;
493
+ continue;
494
+ }
495
+ md += `| Transition | Count |\n|------------|-------|\n`;
496
+ const sorted = Object.entries(trans).sort((a, b) => b[1] - a[1]);
497
+ for (const [key, count] of sorted) {
498
+ md += `| ${key} | ${count} |\n`;
499
+ }
500
+ }
501
+
502
+ // Individual dialogue details
503
+ md += `\n## Per-Dialogue Detail\n\n`;
504
+ md += `| ID | Scenario | Condition | Arch | Turn 3 | Turn 5 | Changed? |\n`;
505
+ md += `|----|----------|-----------|------|--------|--------|----------|\n`;
506
+
507
+ // Group by dialogue
508
+ const byDialogue = {};
509
+ for (const c of codings) {
510
+ if (!byDialogue[c.id]) byDialogue[c.id] = { ...c };
511
+ byDialogue[c.id][`turn${c.turn}`] = c.coding.primary_strategy;
512
+ }
513
+ for (const [id, d] of Object.entries(byDialogue)) {
514
+ const s3 = d.turn3 || '?';
515
+ const s5 = d.turn5 || '?';
516
+ const changed = s3 !== s5 ? 'YES' : 'no';
517
+ md += `| ${id} | ${d.scenario_id} | ${d.condition} | ${d.architecture} | ${s3} | ${s5} | ${changed} |\n`;
518
+ }
519
+
520
+ return md;
521
+ }
522
+
523
+ // ── Chi-Square Test ──────────────────────────────────────────────────────
524
+
525
+ function chiSquareTest(observed) {
526
+ // observed is a 2D array: rows=strategies, cols=conditions
527
+ const nRows = observed.length;
528
+ const nCols = observed[0].length;
529
+ const rowTotals = observed.map(row => row.reduce((a, b) => a + b, 0));
530
+ const colTotals = [];
531
+ for (let j = 0; j < nCols; j++) {
532
+ colTotals.push(observed.reduce((sum, row) => sum + row[j], 0));
533
+ }
534
+ const grand = rowTotals.reduce((a, b) => a + b, 0);
535
+
536
+ if (grand === 0) return { chi2: 0, df: 0, p: 1, cramersV: 0 };
537
+
538
+ let chi2 = 0;
539
+ for (let i = 0; i < nRows; i++) {
540
+ for (let j = 0; j < nCols; j++) {
541
+ const expected = (rowTotals[i] * colTotals[j]) / grand;
542
+ if (expected > 0) {
543
+ chi2 += Math.pow(observed[i][j] - expected, 2) / expected;
544
+ }
545
+ }
546
+ }
547
+
548
+ const df = (nRows - 1) * (nCols - 1);
549
+ const k = Math.min(nRows, nCols);
550
+ const cramersV = grand > 0 && k > 1 ? Math.sqrt(chi2 / (grand * (k - 1))) : 0;
551
+
552
+ // Approximate p-value using chi-square CDF (Wilson-Hilferty approximation)
553
+ const p = chi2PValue(chi2, df);
554
+
555
+ return { chi2, df, p, cramersV };
556
+ }
557
+
558
+ function chi2PValue(x, df) {
559
+ if (df <= 0 || x <= 0) return 1;
560
+ // Regularized upper incomplete gamma function approximation
561
+ // Using series expansion for small chi2 values
562
+ const a = df / 2;
563
+ const z = x / 2;
564
+
565
+ // For moderate df and chi2, use Wilson-Hilferty normal approximation
566
+ if (df > 2) {
567
+ const cube = 1 - 2 / (9 * df);
568
+ const stdNorm = (Math.pow(x / df, 1/3) - cube) / Math.sqrt(2 / (9 * df));
569
+ // Standard normal CDF complement
570
+ return 1 - normalCDF(stdNorm);
571
+ }
572
+
573
+ // For df=1,2 use exact formula
574
+ if (df === 1) return 2 * (1 - normalCDF(Math.sqrt(x)));
575
+ if (df === 2) return Math.exp(-x / 2);
576
+ return 1;
577
+ }
578
+
579
+ function normalCDF(x) {
580
+ // Abramowitz & Stegun approximation
581
+ const a1 = 0.254829592, a2 = -0.284496736, a3 = 1.421413741;
582
+ const a4 = -1.453152027, a5 = 1.061405429, p = 0.3275911;
583
+ const sign = x < 0 ? -1 : 1;
584
+ x = Math.abs(x) / Math.SQRT2;
585
+ const t = 1 / (1 + p * x);
586
+ const y = 1 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
587
+ return 0.5 * (1 + sign * y);
588
+ }
589
+
590
+ // ── Analysis ─────────────────────────────────────────────────────────────
591
+
592
+ function analyzeResults(codings) {
593
+ const analysis = {
594
+ n: codings.length,
595
+ strategyByCondition: {},
596
+ strategyByArchitecture: {},
597
+ strategyByScenario: {},
598
+ exemplars: {},
599
+ chiSquare: {},
600
+ };
601
+
602
+ // Initialize counts
603
+ for (const s of STRATEGY_KEYS) {
604
+ analysis.strategyByCondition[s] = { base: 0, recognition: 0 };
605
+ analysis.strategyByArchitecture[s] = { single: 0, multi: 0 };
606
+ analysis.exemplars[s] = { base: null, recognition: null };
607
+ }
608
+
609
+ const scenarios = [...new Set(codings.map(c => c.scenario_id))];
610
+ for (const sc of scenarios) {
611
+ analysis.strategyByScenario[sc] = {};
612
+ for (const s of STRATEGY_KEYS) {
613
+ analysis.strategyByScenario[sc][s] = { base: 0, recognition: 0 };
614
+ }
615
+ }
616
+
617
+ // Count and collect exemplars
618
+ for (const c of codings) {
619
+ const strat = c.coding.primary_strategy;
620
+ if (!STRATEGY_KEYS.includes(strat)) continue;
621
+
622
+ analysis.strategyByCondition[strat][c.condition]++;
623
+ analysis.strategyByArchitecture[strat][c.architecture]++;
624
+ analysis.strategyByScenario[c.scenario_id][strat][c.condition]++;
625
+
626
+ // Collect exemplar (highest confidence)
627
+ const conf = c.coding.confidence || 0;
628
+ const existing = analysis.exemplars[strat][c.condition];
629
+ if (!existing || conf > (existing.confidence || 0)) {
630
+ analysis.exemplars[strat][c.condition] = {
631
+ id: c.id,
632
+ scenario_id: c.scenario_id,
633
+ profile_name: c.profile_name,
634
+ quote: c.coding.evidence_quote,
635
+ confidence: conf,
636
+ };
637
+ }
638
+ }
639
+
640
+ // Chi-square: strategy × condition (overall)
641
+ const activeStrategies = STRATEGY_KEYS.filter(
642
+ s => analysis.strategyByCondition[s].base > 0 || analysis.strategyByCondition[s].recognition > 0
643
+ );
644
+ if (activeStrategies.length > 1) {
645
+ const observed = activeStrategies.map(s => [
646
+ analysis.strategyByCondition[s].base,
647
+ analysis.strategyByCondition[s].recognition,
648
+ ]);
649
+ analysis.chiSquare.overall = {
650
+ ...chiSquareTest(observed),
651
+ strategies: activeStrategies,
652
+ };
653
+ }
654
+
655
+ // Chi-square per scenario
656
+ for (const sc of scenarios) {
657
+ const scActive = STRATEGY_KEYS.filter(
658
+ s => analysis.strategyByScenario[sc][s].base > 0 || analysis.strategyByScenario[sc][s].recognition > 0
659
+ );
660
+ if (scActive.length > 1) {
661
+ const observed = scActive.map(s => [
662
+ analysis.strategyByScenario[sc][s].base,
663
+ analysis.strategyByScenario[sc][s].recognition,
664
+ ]);
665
+ analysis.chiSquare[sc] = {
666
+ ...chiSquareTest(observed),
667
+ strategies: scActive,
668
+ };
669
+ }
670
+ }
671
+
672
+ // Chi-square: strategy × architecture
673
+ const archActive = STRATEGY_KEYS.filter(
674
+ s => analysis.strategyByArchitecture[s].single > 0 || analysis.strategyByArchitecture[s].multi > 0
675
+ );
676
+ if (archActive.length > 1) {
677
+ const observed = archActive.map(s => [
678
+ analysis.strategyByArchitecture[s].single,
679
+ analysis.strategyByArchitecture[s].multi,
680
+ ]);
681
+ analysis.chiSquare.architecture = {
682
+ ...chiSquareTest(observed),
683
+ strategies: archActive,
684
+ };
685
+ }
686
+
687
+ return analysis;
688
+ }
689
+
690
+ // ── Report Generation ────────────────────────────────────────────────────
691
+
692
+ function generateReport(codings, analysis) {
693
+ const baseN = codings.filter(c => c.condition === 'base').length;
694
+ const recogN = codings.filter(c => c.condition === 'recognition').length;
695
+
696
+ let md = `# Dialectical Impasse Resolution Strategy Coding
697
+
698
+ **Generated:** ${new Date().toISOString()}
699
+ **Run ID:** ${codings[0]?.run_id || DEFAULT_RUN_ID}
700
+ **N:** ${analysis.n} dialogues (base=${baseN}, recognition=${recogN})
701
+ **Scenarios:** ${[...new Set(codings.map(c => c.scenario_id))].join(', ')}
702
+
703
+ ## Strategy Distribution by Condition
704
+
705
+ | Strategy | Base (N=${baseN}) | % | Recognition (N=${recogN}) | % | Diff |
706
+ |----------|-----------|------|---------------|------|------|
707
+ `;
708
+
709
+ for (const s of STRATEGY_KEYS) {
710
+ const b = analysis.strategyByCondition[s].base;
711
+ const r = analysis.strategyByCondition[s].recognition;
712
+ const bPct = baseN > 0 ? (b / baseN * 100).toFixed(1) : '0.0';
713
+ const rPct = recogN > 0 ? (r / recogN * 100).toFixed(1) : '0.0';
714
+ const diff = (parseFloat(rPct) - parseFloat(bPct)).toFixed(1);
715
+ md += `| ${STRATEGIES[s].label} | ${b} | ${bPct}% | ${r} | ${rPct}% | ${diff > 0 ? '+' : ''}${diff}% |\n`;
716
+ }
717
+
718
+ // Chi-square results
719
+ if (analysis.chiSquare.overall) {
720
+ const cs = analysis.chiSquare.overall;
721
+ md += `\n**Chi-square (strategy × condition):** χ²(${cs.df}) = ${cs.chi2.toFixed(2)}, `;
722
+ md += cs.p < .001 ? 'p < .001' : `p = ${cs.p.toFixed(3)}`;
723
+ md += `, Cramér's V = ${cs.cramersV.toFixed(3)}\n`;
724
+ }
725
+
726
+ // Architecture table
727
+ md += `\n## Strategy Distribution by Architecture
728
+
729
+ | Strategy | Single | Multi | Diff |
730
+ |----------|--------|-------|------|
731
+ `;
732
+ const singleN = codings.filter(c => c.architecture === 'single').length;
733
+ const multiN = codings.filter(c => c.architecture === 'multi').length;
734
+ for (const s of STRATEGY_KEYS) {
735
+ const si = analysis.strategyByArchitecture[s].single;
736
+ const mu = analysis.strategyByArchitecture[s].multi;
737
+ const siPct = singleN > 0 ? (si / singleN * 100).toFixed(1) : '0.0';
738
+ const muPct = multiN > 0 ? (mu / multiN * 100).toFixed(1) : '0.0';
739
+ const diff = (parseFloat(muPct) - parseFloat(siPct)).toFixed(1);
740
+ md += `| ${STRATEGIES[s].label} | ${si} (${siPct}%) | ${mu} (${muPct}%) | ${diff > 0 ? '+' : ''}${diff}% |\n`;
741
+ }
742
+
743
+ if (analysis.chiSquare.architecture) {
744
+ const cs = analysis.chiSquare.architecture;
745
+ md += `\n**Chi-square (strategy × architecture):** χ²(${cs.df}) = ${cs.chi2.toFixed(2)}, `;
746
+ md += cs.p < .001 ? 'p < .001' : `p = ${cs.p.toFixed(3)}`;
747
+ md += `, Cramér's V = ${cs.cramersV.toFixed(3)}\n`;
748
+ }
749
+
750
+ // Per-scenario breakdown
751
+ md += `\n## Per-Scenario Breakdown\n`;
752
+ const scenarios = [...new Set(codings.map(c => c.scenario_id))];
753
+ for (const sc of scenarios) {
754
+ const scCodings = codings.filter(c => c.scenario_id === sc);
755
+ const scBase = scCodings.filter(c => c.condition === 'base').length;
756
+ const scRecog = scCodings.filter(c => c.condition === 'recognition').length;
757
+ md += `\n### ${sc.replace(/_/g, ' ')} (base=${scBase}, recog=${scRecog})\n\n`;
758
+ md += `| Strategy | Base | Recognition |\n|----------|------|-------------|\n`;
759
+ for (const s of STRATEGY_KEYS) {
760
+ const b = analysis.strategyByScenario[sc][s].base;
761
+ const r = analysis.strategyByScenario[sc][s].recognition;
762
+ if (b > 0 || r > 0) {
763
+ md += `| ${STRATEGIES[s].label} | ${b} | ${r} |\n`;
764
+ }
765
+ }
766
+ if (analysis.chiSquare[sc]) {
767
+ const cs = analysis.chiSquare[sc];
768
+ md += `\nχ²(${cs.df}) = ${cs.chi2.toFixed(2)}, `;
769
+ md += cs.p < .001 ? 'p < .001' : `p = ${cs.p.toFixed(3)}`;
770
+ md += `, V = ${cs.cramersV.toFixed(3)}\n`;
771
+ }
772
+ }
773
+
774
+ // Exemplar quotes
775
+ md += `\n## Exemplar Quotes by Strategy\n`;
776
+ for (const s of STRATEGY_KEYS) {
777
+ const baseEx = analysis.exemplars[s].base;
778
+ const recogEx = analysis.exemplars[s].recognition;
779
+ if (baseEx || recogEx) {
780
+ md += `\n### ${STRATEGIES[s].label}\n`;
781
+ md += `*${STRATEGIES[s].description}*\n\n`;
782
+ if (baseEx) {
783
+ md += `- **Base** (${baseEx.scenario_id}, id=${baseEx.id}, conf=${baseEx.confidence}): "${baseEx.quote}"\n`;
784
+ }
785
+ if (recogEx) {
786
+ md += `- **Recognition** (${recogEx.scenario_id}, id=${recogEx.id}, conf=${recogEx.confidence}): "${recogEx.quote}"\n`;
787
+ }
788
+ }
789
+ }
790
+
791
+ // Confidence distribution
792
+ md += `\n## Coding Confidence Distribution\n\n`;
793
+ const confByCondition = { base: [], recognition: [] };
794
+ for (const c of codings) {
795
+ confByCondition[c.condition].push(c.coding.confidence || 0);
796
+ }
797
+ for (const cond of ['base', 'recognition']) {
798
+ const vals = confByCondition[cond];
799
+ if (vals.length > 0) {
800
+ const mean = vals.reduce((a, b) => a + b, 0) / vals.length;
801
+ md += `- **${cond}**: mean confidence = ${mean.toFixed(2)} (N=${vals.length})\n`;
802
+ }
803
+ }
804
+
805
+ // Domination/recognition marker summary
806
+ md += `\n## Marker Analysis\n\n`;
807
+ const domMarkers = { base: [], recognition: [] };
808
+ const recMarkers = { base: [], recognition: [] };
809
+ for (const c of codings) {
810
+ if (c.coding.domination_markers?.length > 0) {
811
+ domMarkers[c.condition].push(...c.coding.domination_markers);
812
+ }
813
+ if (c.coding.recognition_markers?.length > 0) {
814
+ recMarkers[c.condition].push(...c.coding.recognition_markers);
815
+ }
816
+ }
817
+ md += `### Domination markers\n`;
818
+ md += `- Base: ${domMarkers.base.length} markers across ${baseN} dialogues\n`;
819
+ md += `- Recognition: ${domMarkers.recognition.length} markers across ${recogN} dialogues\n`;
820
+ if (domMarkers.base.length > 0) {
821
+ md += `- Base examples: ${domMarkers.base.slice(0, 5).map(m => `"${m}"`).join(', ')}\n`;
822
+ }
823
+ if (domMarkers.recognition.length > 0) {
824
+ md += `- Recognition examples: ${domMarkers.recognition.slice(0, 5).map(m => `"${m}"`).join(', ')}\n`;
825
+ }
826
+
827
+ md += `\n### Recognition markers\n`;
828
+ md += `- Base: ${recMarkers.base.length} markers across ${baseN} dialogues\n`;
829
+ md += `- Recognition: ${recMarkers.recognition.length} markers across ${recogN} dialogues\n`;
830
+ if (recMarkers.base.length > 0) {
831
+ md += `- Base examples: ${recMarkers.base.slice(0, 5).map(m => `"${m}"`).join(', ')}\n`;
832
+ }
833
+ if (recMarkers.recognition.length > 0) {
834
+ md += `- Recognition examples: ${recMarkers.recognition.slice(0, 5).map(m => `"${m}"`).join(', ')}\n`;
835
+ }
836
+
837
+ return md;
838
+ }
839
+
840
+ // ── CLI ──────────────────────────────────────────────────────────────────
841
+
842
+ function parseArgs() {
843
+ const args = process.argv.slice(2);
844
+ const opts = {
845
+ model: 'claude-code',
846
+ runId: DEFAULT_RUN_ID,
847
+ perTurn: false,
848
+ };
849
+ for (let i = 0; i < args.length; i++) {
850
+ switch (args[i]) {
851
+ case '--model': opts.model = args[++i]; break;
852
+ case '--run-id': opts.runId = args[++i]; break;
853
+ case '--per-turn': opts.perTurn = true; break;
854
+ case '--help':
855
+ console.log(`Usage: node scripts/code-impasse-strategies.js [options]
856
+
857
+ Options:
858
+ --model <model> Model for coding (default: claude-code)
859
+ claude-code — Claude Code CLI (subscription, free)
860
+ haiku — OpenRouter Haiku
861
+ sonnet — OpenRouter Sonnet
862
+ --run-id <id> Run ID to code (default: ${DEFAULT_RUN_ID})
863
+ --per-turn Code turns 3 and 5 independently (track strategy evolution)
864
+ --help Show this help`);
865
+ process.exit(0);
866
+ }
867
+ }
868
+ return opts;
869
+ }
870
+
871
+ // ── Main ─────────────────────────────────────────────────────────────────
872
+
873
+ async function main() {
874
+ const opts = parseArgs();
875
+
876
+ const dbPath = path.join(process.cwd(), 'data', 'evaluations.db');
877
+ if (!fs.existsSync(dbPath)) {
878
+ console.error('Database not found:', dbPath);
879
+ process.exit(1);
880
+ }
881
+
882
+ const db = new Database(dbPath);
883
+
884
+ console.log('='.repeat(70));
885
+ console.log(opts.perTurn
886
+ ? 'PER-TURN STRATEGY CODING (Turns 3 & 5)'
887
+ : 'DIALECTICAL IMPASSE RESOLUTION STRATEGY CODING');
888
+ console.log('='.repeat(70));
889
+ console.log(`Model: ${opts.model} | Run ID: ${opts.runId}`);
890
+
891
+ // Load dialogues
892
+ const dialogues = loadDialogues(db, opts.runId);
893
+ console.log(`\nLoaded ${dialogues.length} dialogues`);
894
+ const baseN = dialogues.filter(d => d.condition === 'base').length;
895
+ const recogN = dialogues.filter(d => d.condition === 'recognition').length;
896
+ console.log(` Base: ${baseN}, Recognition: ${recogN}`);
897
+ console.log(` Scenarios: ${[...new Set(dialogues.map(d => d.scenario_id))].join(', ')}`);
898
+
899
+ if (dialogues.length === 0) {
900
+ console.error('No dialogues found.');
901
+ db.close();
902
+ return;
903
+ }
904
+
905
+ // Ensure exports directory
906
+ const exportsDir = path.join(process.cwd(), 'exports');
907
+ if (!fs.existsSync(exportsDir)) {
908
+ fs.mkdirSync(exportsDir, { recursive: true });
909
+ }
910
+
911
+ const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
912
+
913
+ if (opts.perTurn) {
914
+ // ── Per-Turn Mode ───────────────────────────────────────────────
915
+ const TARGET_TURNS = [3, 5];
916
+ const codings = [];
917
+ let errors = 0;
918
+ const startTime = Date.now();
919
+ const totalCalls = dialogues.length * TARGET_TURNS.length;
920
+ let callNum = 0;
921
+
922
+ for (const d of dialogues) {
923
+ // Verify dialogue has enough suggestions
924
+ if (d.suggestions.length < 6) {
925
+ console.warn(` SKIP ${d.id}: only ${d.suggestions.length} suggestions (need 6)`);
926
+ continue;
927
+ }
928
+
929
+ for (const turn of TARGET_TURNS) {
930
+ callNum++;
931
+ const progress = `[${callNum}/${totalCalls}]`;
932
+ process.stdout.write(` ${progress} Turn ${turn}: ${d.scenario_id} / ${d.profile_name}...`);
933
+
934
+ try {
935
+ const prompt = buildPerTurnCodingPrompt(d, turn);
936
+ const content = await callModel(prompt, opts.model);
937
+ const parsed = parseJsonResponse(content);
938
+
939
+ if (!STRATEGY_KEYS.includes(parsed.primary_strategy)) {
940
+ console.warn(` WARN: invalid strategy "${parsed.primary_strategy}", skipping`);
941
+ errors++;
942
+ continue;
943
+ }
944
+
945
+ codings.push({
946
+ id: d.id,
947
+ run_id: opts.runId,
948
+ scenario_id: d.scenario_id,
949
+ profile_name: d.profile_name,
950
+ condition: d.condition,
951
+ architecture: d.architecture,
952
+ overall_score: d.overall_score,
953
+ turn,
954
+ coding: parsed,
955
+ });
956
+
957
+ console.log(` → ${parsed.primary_strategy} (conf=${parsed.confidence})`);
958
+ } catch (err) {
959
+ errors++;
960
+ console.error(` ERROR: ${err.message}`);
961
+ }
962
+ }
963
+ }
964
+
965
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
966
+ console.log(`\nCoding complete: ${codings.length} coded, ${errors} errors, ${elapsed}s`);
967
+
968
+ if (codings.length === 0) {
969
+ console.error('No successful codings.');
970
+ db.close();
971
+ return;
972
+ }
973
+
974
+ const analysis = analyzePerTurnResults(codings);
975
+
976
+ const jsonPath = path.join(exportsDir, `impasse-per-turn-coding-${timestamp}.json`);
977
+ fs.writeFileSync(jsonPath, JSON.stringify({
978
+ generated: new Date().toISOString(),
979
+ model: opts.model,
980
+ runId: opts.runId,
981
+ mode: 'per-turn',
982
+ targetTurns: TARGET_TURNS,
983
+ n: codings.length,
984
+ errors,
985
+ codings,
986
+ analysis,
987
+ }, null, 2));
988
+ console.log(`\nJSON: ${jsonPath}`);
989
+
990
+ const mdReport = generatePerTurnReport(codings, analysis);
991
+ const mdPath = path.join(exportsDir, `impasse-per-turn-coding-${timestamp}.md`);
992
+ fs.writeFileSync(mdPath, mdReport);
993
+ console.log(`Markdown: ${mdPath}`);
994
+
995
+ // Print summary
996
+ console.log('\n' + '─'.repeat(70));
997
+ console.log('PER-TURN STRATEGY SUMMARY');
998
+ console.log('─'.repeat(70));
999
+
1000
+ for (const turn of TARGET_TURNS) {
1001
+ console.log(`\n Turn ${turn}:`);
1002
+ console.log(` ${'Strategy'.padEnd(25)} ${'Base'.padEnd(8)} Recog`);
1003
+ for (const s of STRATEGY_KEYS) {
1004
+ const b = analysis.turnStrategies[turn][s].base;
1005
+ const r = analysis.turnStrategies[turn][s].recognition;
1006
+ if (b > 0 || r > 0) {
1007
+ console.log(` ${STRATEGIES[s].label.padEnd(23)} ${String(b).padEnd(8)} ${r}`);
1008
+ }
1009
+ }
1010
+ }
1011
+
1012
+ console.log(`\n Strategy stability (turn 3 → turn 5):`);
1013
+ for (const cond of ['base', 'recognition']) {
1014
+ const s = analysis.stability[cond];
1015
+ const total = s.same + s.changed;
1016
+ const rate = total > 0 ? (s.same / total * 100).toFixed(0) : 'N/A';
1017
+ console.log(` ${cond}: ${s.same} same, ${s.changed} changed (${rate}% stable)`);
1018
+ }
1019
+
1020
+ console.log(`\n Transition patterns:`);
1021
+ for (const cond of ['base', 'recognition']) {
1022
+ console.log(` ${cond}:`);
1023
+ const trans = analysis.transitionsByCondition[cond];
1024
+ const sorted = Object.entries(trans).sort((a, b) => b[1] - a[1]);
1025
+ for (const [key, count] of sorted) {
1026
+ console.log(` ${key}: ${count}`);
1027
+ }
1028
+ }
1029
+
1030
+ } else {
1031
+ // ── Overall Mode (original) ─────────────────────────────────────
1032
+
1033
+ // Code each dialogue
1034
+ const codings = [];
1035
+ let errors = 0;
1036
+ const startTime = Date.now();
1037
+
1038
+ for (let i = 0; i < dialogues.length; i++) {
1039
+ const d = dialogues[i];
1040
+ const progress = `[${i + 1}/${dialogues.length}]`;
1041
+ process.stdout.write(` ${progress} Coding ${d.scenario_id} / ${d.profile_name}...`);
1042
+
1043
+ try {
1044
+ const prompt = buildCodingPrompt(d);
1045
+ const content = await callModel(prompt, opts.model);
1046
+ const parsed = parseJsonResponse(content);
1047
+
1048
+ // Validate strategy
1049
+ if (!STRATEGY_KEYS.includes(parsed.primary_strategy)) {
1050
+ console.warn(` WARN: invalid strategy "${parsed.primary_strategy}", skipping`);
1051
+ errors++;
1052
+ continue;
1053
+ }
1054
+
1055
+ codings.push({
1056
+ id: d.id,
1057
+ run_id: opts.runId,
1058
+ scenario_id: d.scenario_id,
1059
+ profile_name: d.profile_name,
1060
+ condition: d.condition,
1061
+ architecture: d.architecture,
1062
+ overall_score: d.overall_score,
1063
+ coding: parsed,
1064
+ });
1065
+
1066
+ console.log(` → ${parsed.primary_strategy} (conf=${parsed.confidence})`);
1067
+ } catch (err) {
1068
+ errors++;
1069
+ console.error(` ERROR: ${err.message}`);
1070
+ }
1071
+ }
1072
+
1073
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
1074
+ console.log(`\nCoding complete: ${codings.length} coded, ${errors} errors, ${elapsed}s`);
1075
+
1076
+ if (codings.length === 0) {
1077
+ console.error('No successful codings.');
1078
+ db.close();
1079
+ return;
1080
+ }
1081
+
1082
+ // Analyze
1083
+ const analysis = analyzeResults(codings);
1084
+
1085
+ // Write outputs
1086
+ const jsonPath = path.join(exportsDir, `impasse-strategy-coding-${timestamp}.json`);
1087
+ fs.writeFileSync(jsonPath, JSON.stringify({
1088
+ generated: new Date().toISOString(),
1089
+ model: opts.model,
1090
+ runId: opts.runId,
1091
+ n: codings.length,
1092
+ errors,
1093
+ codings,
1094
+ analysis,
1095
+ }, null, 2));
1096
+ console.log(`\nJSON: ${jsonPath}`);
1097
+
1098
+ const mdReport = generateReport(codings, analysis);
1099
+ const mdPath = path.join(exportsDir, `impasse-strategy-coding-${timestamp}.md`);
1100
+ fs.writeFileSync(mdPath, mdReport);
1101
+ console.log(`Markdown: ${mdPath}`);
1102
+
1103
+ // Print summary
1104
+ console.log('\n' + '─'.repeat(70));
1105
+ console.log('STRATEGY DISTRIBUTION SUMMARY');
1106
+ console.log('─'.repeat(70));
1107
+ console.log(`${'Strategy'.padEnd(25)} ${'Base'.padEnd(12)} ${'Recog'.padEnd(12)} Diff`);
1108
+ console.log('─'.repeat(60));
1109
+ for (const s of STRATEGY_KEYS) {
1110
+ const b = analysis.strategyByCondition[s].base;
1111
+ const r = analysis.strategyByCondition[s].recognition;
1112
+ const bPct = baseN > 0 ? (b / baseN * 100).toFixed(0) : '0';
1113
+ const rPct = recogN > 0 ? (r / recogN * 100).toFixed(0) : '0';
1114
+ const diff = parseInt(rPct) - parseInt(bPct);
1115
+ console.log(` ${STRATEGIES[s].label.padEnd(23)} ${(b + ' (' + bPct + '%)').padEnd(12)} ${(r + ' (' + rPct + '%)').padEnd(12)} ${diff > 0 ? '+' : ''}${diff}%`);
1116
+ }
1117
+
1118
+ if (analysis.chiSquare.overall) {
1119
+ const cs = analysis.chiSquare.overall;
1120
+ const pStr = cs.p < .001 ? 'p < .001' : `p = ${cs.p.toFixed(3)}`;
1121
+ console.log(`\n χ²(${cs.df}) = ${cs.chi2.toFixed(2)}, ${pStr}, V = ${cs.cramersV.toFixed(3)}`);
1122
+ }
1123
+ }
1124
+
1125
+ db.close();
1126
+ console.log('\nDone.');
1127
+ }
1128
+
1129
+ main().catch(err => {
1130
+ console.error('Fatal error:', err);
1131
+ process.exit(1);
1132
+ });