@machinespirits/eval 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +161 -0
- package/config/eval-settings.yaml +18 -0
- package/config/evaluation-rubric-learner.yaml +277 -0
- package/config/evaluation-rubric.yaml +613 -0
- package/config/interaction-eval-scenarios.yaml +93 -50
- package/config/learner-agents.yaml +124 -193
- package/config/machinespirits-eval.code-workspace +11 -0
- package/config/providers.yaml +60 -0
- package/config/suggestion-scenarios.yaml +1399 -0
- package/config/tutor-agents.yaml +716 -0
- package/docs/EVALUATION-VARIABLES.md +589 -0
- package/docs/REPLICATION-PLAN.md +577 -0
- package/index.js +15 -6
- package/package.json +16 -22
- package/routes/evalRoutes.js +88 -36
- package/scripts/analyze-judge-reliability.js +401 -0
- package/scripts/analyze-run.js +97 -0
- package/scripts/analyze-run.mjs +282 -0
- package/scripts/analyze-validation-failures.js +141 -0
- package/scripts/check-run.mjs +17 -0
- package/scripts/code-impasse-strategies.js +1132 -0
- package/scripts/compare-runs.js +44 -0
- package/scripts/compare-suggestions.js +80 -0
- package/scripts/compare-transformation.js +116 -0
- package/scripts/dig-into-run.js +158 -0
- package/scripts/eval-cli.js +2626 -0
- package/scripts/generate-paper-figures.py +452 -0
- package/scripts/qualitative-analysis-ai.js +1313 -0
- package/scripts/qualitative-analysis.js +688 -0
- package/scripts/seed-db.js +87 -0
- package/scripts/show-failed-suggestions.js +64 -0
- package/scripts/validate-content.js +192 -0
- package/server.js +3 -2
- package/services/__tests__/evalConfigLoader.test.js +338 -0
- package/services/anovaStats.js +499 -0
- package/services/contentResolver.js +407 -0
- package/services/dialogueTraceAnalyzer.js +454 -0
- package/services/evalConfigLoader.js +625 -0
- package/services/evaluationRunner.js +2171 -270
- package/services/evaluationStore.js +564 -29
- package/services/learnerConfigLoader.js +75 -5
- package/services/learnerRubricEvaluator.js +284 -0
- package/services/learnerTutorInteractionEngine.js +375 -0
- package/services/processUtils.js +18 -0
- package/services/progressLogger.js +98 -0
- package/services/promptRecommendationService.js +31 -26
- package/services/promptRewriter.js +427 -0
- package/services/rubricEvaluator.js +543 -70
- package/services/streamingReporter.js +104 -0
- package/services/turnComparisonAnalyzer.js +494 -0
- package/components/MobileEvalDashboard.tsx +0 -267
- package/components/comparison/DeltaAnalysisTable.tsx +0 -137
- package/components/comparison/ProfileComparisonCard.tsx +0 -176
- package/components/comparison/RecognitionABMode.tsx +0 -385
- package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
- package/components/comparison/WinnerIndicator.tsx +0 -64
- package/components/comparison/index.ts +0 -5
- package/components/mobile/BottomSheet.tsx +0 -233
- package/components/mobile/DimensionBreakdown.tsx +0 -210
- package/components/mobile/DocsView.tsx +0 -363
- package/components/mobile/LogsView.tsx +0 -481
- package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
- package/components/mobile/QuickTestView.tsx +0 -1098
- package/components/mobile/RecognitionTypeChart.tsx +0 -124
- package/components/mobile/RecognitionView.tsx +0 -809
- package/components/mobile/RunDetailView.tsx +0 -261
- package/components/mobile/RunHistoryView.tsx +0 -367
- package/components/mobile/ScoreRadial.tsx +0 -211
- package/components/mobile/StreamingLogPanel.tsx +0 -230
- package/components/mobile/SynthesisStrategyChart.tsx +0 -140
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
- package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
- package/docs/research/COST-ANALYSIS.md +0 -56
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
- package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
- package/docs/research/PAPER-UNIFIED.md +0 -659
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
- package/docs/research/apa.csl +0 -2133
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
- package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
- package/docs/research/paper-draft/full-paper.md +0 -136
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +0 -515
- package/docs/research/transcript-baseline.md +0 -139
- package/docs/research/transcript-recognition-multiagent.md +0 -187
- package/hooks/useEvalData.ts +0 -625
- package/server-init.js +0 -45
- package/services/benchmarkService.js +0 -1892
- package/types.ts +0 -165
- package/utils/haptics.ts +0 -45
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ANOVA Statistics Module
|
|
3
|
+
*
|
|
4
|
+
* Three-way ANOVA for 2×2×2 factorial designs.
|
|
5
|
+
* Extracted from benchmarkService to be reusable by the main evaluation pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Factors:
|
|
8
|
+
* A: Recognition (standard vs recognition-enhanced prompts)
|
|
9
|
+
* B: Multi-agent tutor (single vs ego+superego dialogue)
|
|
10
|
+
* C: Multi-agent learner (unified vs ego_superego)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// ---- F-distribution p-value via regularized incomplete beta function ----
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Log-gamma function using Lanczos approximation (g=7, n=9 coefficients).
|
|
17
|
+
* Accurate to ~15 decimal digits for positive real arguments.
|
|
18
|
+
* Uses the reflection formula for z < 0.5.
|
|
19
|
+
*/
|
|
20
|
+
function lnGamma(z) {
|
|
21
|
+
if (z <= 0) return Infinity;
|
|
22
|
+
|
|
23
|
+
// Reflection formula: Gamma(z)*Gamma(1-z) = pi/sin(pi*z)
|
|
24
|
+
if (z < 0.5) {
|
|
25
|
+
return Math.log(Math.PI / Math.sin(Math.PI * z)) - lnGamma(1 - z);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const g = 7;
|
|
29
|
+
const c = [
|
|
30
|
+
0.99999999999980993,
|
|
31
|
+
676.5203681218851,
|
|
32
|
+
-1259.1392167224028,
|
|
33
|
+
771.32342877765313,
|
|
34
|
+
-176.61502916214059,
|
|
35
|
+
12.507343278686905,
|
|
36
|
+
-0.13857109526572012,
|
|
37
|
+
9.9843695780195716e-6,
|
|
38
|
+
1.5056327351493116e-7,
|
|
39
|
+
];
|
|
40
|
+
|
|
41
|
+
z -= 1;
|
|
42
|
+
let x = c[0];
|
|
43
|
+
for (let i = 1; i < g + 2; i++) {
|
|
44
|
+
x += c[i] / (z + i);
|
|
45
|
+
}
|
|
46
|
+
const t = z + g + 0.5;
|
|
47
|
+
return 0.5 * Math.log(2 * Math.PI) + (z + 0.5) * Math.log(t) - t + Math.log(x);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Regularized incomplete beta function I_x(a, b) via the continued fraction
|
|
52
|
+
* representation from Numerical Recipes (betacf). Uses modified Lentz's method.
|
|
53
|
+
*
|
|
54
|
+
* The continued fraction is:
|
|
55
|
+
* I_x(a,b) = prefactor * (1/1+) (d1/1+) (d2/1+) (d3/1+) ...
|
|
56
|
+
* where d_{2m+1} = -(a+m)(a+b+m)x / ((a+2m)(a+2m+1))
|
|
57
|
+
* d_{2m} = m(b-m)x / ((a+2m-1)(a+2m))
|
|
58
|
+
*/
|
|
59
|
+
function regularizedBeta(x, a, b) {
|
|
60
|
+
if (x <= 0) return 0;
|
|
61
|
+
if (x >= 1) return 1;
|
|
62
|
+
|
|
63
|
+
// Use the symmetry relation when x > (a+1)/(a+b+2) for better convergence
|
|
64
|
+
if (x > (a + 1) / (a + b + 2)) {
|
|
65
|
+
return 1 - regularizedBeta(1 - x, b, a);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Compute the prefactor: x^a * (1-x)^b / (a * Beta(a,b))
|
|
69
|
+
const lnPrefactor = a * Math.log(x) + b * Math.log(1 - x)
|
|
70
|
+
- Math.log(a) - lnGamma(a) - lnGamma(b) + lnGamma(a + b);
|
|
71
|
+
const prefactor = Math.exp(lnPrefactor);
|
|
72
|
+
|
|
73
|
+
// Evaluate the continued fraction using modified Lentz's method
|
|
74
|
+
// Following Numerical Recipes "betacf" algorithm
|
|
75
|
+
const maxIter = 200;
|
|
76
|
+
const eps = 3e-14;
|
|
77
|
+
const fpmin = 1e-30;
|
|
78
|
+
|
|
79
|
+
let qab = a + b;
|
|
80
|
+
let qap = a + 1;
|
|
81
|
+
let qam = a - 1;
|
|
82
|
+
let c = 1;
|
|
83
|
+
let d = 1 - qab * x / qap;
|
|
84
|
+
if (Math.abs(d) < fpmin) d = fpmin;
|
|
85
|
+
d = 1 / d;
|
|
86
|
+
let h = d;
|
|
87
|
+
|
|
88
|
+
for (let m = 1; m <= maxIter; m++) {
|
|
89
|
+
// Even step: d_{2m}
|
|
90
|
+
let aa = m * (b - m) * x / ((qam + 2 * m) * (a + 2 * m));
|
|
91
|
+
d = 1 + aa * d;
|
|
92
|
+
if (Math.abs(d) < fpmin) d = fpmin;
|
|
93
|
+
c = 1 + aa / c;
|
|
94
|
+
if (Math.abs(c) < fpmin) c = fpmin;
|
|
95
|
+
d = 1 / d;
|
|
96
|
+
h *= d * c;
|
|
97
|
+
|
|
98
|
+
// Odd step: d_{2m+1}
|
|
99
|
+
aa = -(a + m) * (qab + m) * x / ((a + 2 * m) * (qap + 2 * m));
|
|
100
|
+
d = 1 + aa * d;
|
|
101
|
+
if (Math.abs(d) < fpmin) d = fpmin;
|
|
102
|
+
c = 1 + aa / c;
|
|
103
|
+
if (Math.abs(c) < fpmin) c = fpmin;
|
|
104
|
+
d = 1 / d;
|
|
105
|
+
const del = d * c;
|
|
106
|
+
h *= del;
|
|
107
|
+
|
|
108
|
+
if (Math.abs(del - 1) < eps) break;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return prefactor * h;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Compute p-value for the F-distribution: P(F > x | d1, d2).
|
|
116
|
+
*
|
|
117
|
+
* @param {number} F - The F-statistic
|
|
118
|
+
* @param {number} d1 - Numerator degrees of freedom
|
|
119
|
+
* @param {number} d2 - Denominator degrees of freedom
|
|
120
|
+
* @returns {number} p-value (upper tail probability)
|
|
121
|
+
*/
|
|
122
|
+
function fDistPValue(F, d1, d2) {
|
|
123
|
+
if (F <= 0 || d1 <= 0 || d2 <= 0) return 1;
|
|
124
|
+
if (!isFinite(F)) return 0;
|
|
125
|
+
|
|
126
|
+
const x = d1 * F / (d1 * F + d2);
|
|
127
|
+
return 1 - regularizedBeta(x, d1 / 2, d2 / 2);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Run a three-way ANOVA on factorial cell data.
|
|
132
|
+
*
|
|
133
|
+
* @param {Object} data - Map of cellKey → [scores]
|
|
134
|
+
* Cell keys encode factor levels: "r{0|1}_t{0|1}_l{0|1}"
|
|
135
|
+
* e.g. { "r0_t0_l0": [55, 62, 58], "r0_t0_l1": [60, 65, 63], ... }
|
|
136
|
+
* @returns {Object} ANOVA results with main effects, interactions, and diagnostics
|
|
137
|
+
*/
|
|
138
|
+
export function runThreeWayANOVA(data) {
|
|
139
|
+
const cells = {};
|
|
140
|
+
|
|
141
|
+
// Accept both cellKey format ("r0_t0_l0") and profile-name format
|
|
142
|
+
for (const [key, scores] of Object.entries(data)) {
|
|
143
|
+
if (key.match(/^r[01]_t[01]_l[01]$/)) {
|
|
144
|
+
cells[key] = scores;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// If no cell-keyed data, return error
|
|
149
|
+
if (Object.keys(cells).length === 0) {
|
|
150
|
+
return { error: 'No data available for ANOVA. Data must use cell keys like "r0_t0_l0".' };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const allData = Object.values(cells).flat();
|
|
154
|
+
const N = allData.length;
|
|
155
|
+
if (N === 0) {
|
|
156
|
+
return { error: 'No data available for ANOVA' };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const grandMean = allData.reduce((a, b) => a + b, 0) / N;
|
|
160
|
+
|
|
161
|
+
const getByFactors = (r, t, l) => cells[`r${r}_t${t}_l${l}`] || [];
|
|
162
|
+
|
|
163
|
+
// Calculate marginal means
|
|
164
|
+
const getMarginalMean = (factor, level) => {
|
|
165
|
+
let values = [];
|
|
166
|
+
if (factor === 'recognition') {
|
|
167
|
+
for (const t of [0, 1]) {
|
|
168
|
+
for (const l of [0, 1]) {
|
|
169
|
+
values = values.concat(getByFactors(level, t, l));
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
} else if (factor === 'tutor') {
|
|
173
|
+
for (const r of [0, 1]) {
|
|
174
|
+
for (const l of [0, 1]) {
|
|
175
|
+
values = values.concat(getByFactors(r, level, l));
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
} else if (factor === 'learner') {
|
|
179
|
+
for (const r of [0, 1]) {
|
|
180
|
+
for (const t of [0, 1]) {
|
|
181
|
+
values = values.concat(getByFactors(r, t, level));
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return values.length > 0 ? values.reduce((a, b) => a + b, 0) / values.length : grandMean;
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
const meanR0 = getMarginalMean('recognition', 0);
|
|
189
|
+
const meanR1 = getMarginalMean('recognition', 1);
|
|
190
|
+
const meanT0 = getMarginalMean('tutor', 0);
|
|
191
|
+
const meanT1 = getMarginalMean('tutor', 1);
|
|
192
|
+
const meanL0 = getMarginalMean('learner', 0);
|
|
193
|
+
const meanL1 = getMarginalMean('learner', 1);
|
|
194
|
+
|
|
195
|
+
// Sample sizes per level
|
|
196
|
+
const getN = (factor, level) => {
|
|
197
|
+
let count = 0;
|
|
198
|
+
if (factor === 'recognition') {
|
|
199
|
+
for (const t of [0, 1]) {
|
|
200
|
+
for (const l of [0, 1]) {
|
|
201
|
+
count += getByFactors(level, t, l).length;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
} else if (factor === 'tutor') {
|
|
205
|
+
for (const r of [0, 1]) {
|
|
206
|
+
for (const l of [0, 1]) {
|
|
207
|
+
count += getByFactors(r, level, l).length;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
} else if (factor === 'learner') {
|
|
211
|
+
for (const r of [0, 1]) {
|
|
212
|
+
for (const t of [0, 1]) {
|
|
213
|
+
count += getByFactors(r, t, level).length;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return count;
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// Sum of Squares
|
|
221
|
+
const SST = allData.reduce((acc, x) => acc + (x - grandMean) ** 2, 0);
|
|
222
|
+
|
|
223
|
+
const nR0 = getN('recognition', 0);
|
|
224
|
+
const nR1 = getN('recognition', 1);
|
|
225
|
+
const nT0 = getN('tutor', 0);
|
|
226
|
+
const nT1 = getN('tutor', 1);
|
|
227
|
+
const nL0 = getN('learner', 0);
|
|
228
|
+
const nL1 = getN('learner', 1);
|
|
229
|
+
|
|
230
|
+
const SS_R = nR0 * (meanR0 - grandMean) ** 2 + nR1 * (meanR1 - grandMean) ** 2;
|
|
231
|
+
const SS_T = nT0 * (meanT0 - grandMean) ** 2 + nT1 * (meanT1 - grandMean) ** 2;
|
|
232
|
+
const SS_L = nL0 * (meanL0 - grandMean) ** 2 + nL1 * (meanL1 - grandMean) ** 2;
|
|
233
|
+
|
|
234
|
+
// Two-way interaction means
|
|
235
|
+
const getTwoWayMean = (f1, l1, f2, l2) => {
|
|
236
|
+
let values = [];
|
|
237
|
+
if (f1 === 'recognition' && f2 === 'tutor') {
|
|
238
|
+
for (const l of [0, 1]) values = values.concat(getByFactors(l1, l2, l));
|
|
239
|
+
} else if (f1 === 'recognition' && f2 === 'learner') {
|
|
240
|
+
for (const t of [0, 1]) values = values.concat(getByFactors(l1, t, l2));
|
|
241
|
+
} else if (f1 === 'tutor' && f2 === 'learner') {
|
|
242
|
+
for (const r of [0, 1]) values = values.concat(getByFactors(r, l1, l2));
|
|
243
|
+
}
|
|
244
|
+
return values.length > 0 ? values.reduce((a, b) => a + b, 0) / values.length : grandMean;
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
let SS_RT = 0, SS_RL = 0, SS_TL = 0;
|
|
248
|
+
for (const r of [0, 1]) {
|
|
249
|
+
for (const t of [0, 1]) {
|
|
250
|
+
const cellMean = getTwoWayMean('recognition', r, 'tutor', t);
|
|
251
|
+
const expected = (r === 1 ? meanR1 : meanR0) + (t === 1 ? meanT1 : meanT0) - grandMean;
|
|
252
|
+
const cellN = getByFactors(r, t, 0).length + getByFactors(r, t, 1).length;
|
|
253
|
+
SS_RT += cellN * (cellMean - expected) ** 2;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
for (const r of [0, 1]) {
|
|
257
|
+
for (const l of [0, 1]) {
|
|
258
|
+
const cellMean = getTwoWayMean('recognition', r, 'learner', l);
|
|
259
|
+
const expected = (r === 1 ? meanR1 : meanR0) + (l === 1 ? meanL1 : meanL0) - grandMean;
|
|
260
|
+
const cellN = getByFactors(r, 0, l).length + getByFactors(r, 1, l).length;
|
|
261
|
+
SS_RL += cellN * (cellMean - expected) ** 2;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
for (const t of [0, 1]) {
|
|
265
|
+
for (const l of [0, 1]) {
|
|
266
|
+
const cellMean = getTwoWayMean('tutor', t, 'learner', l);
|
|
267
|
+
const expected = (t === 1 ? meanT1 : meanT0) + (l === 1 ? meanL1 : meanL0) - grandMean;
|
|
268
|
+
const cellN = getByFactors(0, t, l).length + getByFactors(1, t, l).length;
|
|
269
|
+
SS_TL += cellN * (cellMean - expected) ** 2;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Three-way interaction
|
|
274
|
+
let SS_cells = 0;
|
|
275
|
+
for (const r of [0, 1]) {
|
|
276
|
+
for (const t of [0, 1]) {
|
|
277
|
+
for (const l of [0, 1]) {
|
|
278
|
+
const cellData = getByFactors(r, t, l);
|
|
279
|
+
if (cellData.length > 0) {
|
|
280
|
+
const cellMean = cellData.reduce((a, b) => a + b, 0) / cellData.length;
|
|
281
|
+
SS_cells += cellData.length * (cellMean - grandMean) ** 2;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
const SS_RTL = Math.max(0, SS_cells - SS_R - SS_T - SS_L - SS_RT - SS_RL - SS_TL);
|
|
287
|
+
|
|
288
|
+
// Error SS (within cells)
|
|
289
|
+
let SS_E = 0;
|
|
290
|
+
for (const r of [0, 1]) {
|
|
291
|
+
for (const t of [0, 1]) {
|
|
292
|
+
for (const l of [0, 1]) {
|
|
293
|
+
const cellData = getByFactors(r, t, l);
|
|
294
|
+
if (cellData.length > 0) {
|
|
295
|
+
const cellMean = cellData.reduce((a, b) => a + b, 0) / cellData.length;
|
|
296
|
+
SS_E += cellData.reduce((acc, x) => acc + (x - cellMean) ** 2, 0);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Degrees of freedom
|
|
303
|
+
const df_R = 1, df_T = 1, df_L = 1;
|
|
304
|
+
const df_RT = 1, df_RL = 1, df_TL = 1;
|
|
305
|
+
const df_RTL = 1;
|
|
306
|
+
const df_E = N - 8;
|
|
307
|
+
const df_T_total = N - 1;
|
|
308
|
+
|
|
309
|
+
// Mean Squares
|
|
310
|
+
const MS_R = SS_R / df_R;
|
|
311
|
+
const MS_T = SS_T / df_T;
|
|
312
|
+
const MS_L = SS_L / df_L;
|
|
313
|
+
const MS_RT = SS_RT / df_RT;
|
|
314
|
+
const MS_RL = SS_RL / df_RL;
|
|
315
|
+
const MS_TL = SS_TL / df_TL;
|
|
316
|
+
const MS_RTL = SS_RTL / df_RTL;
|
|
317
|
+
const MS_E = df_E > 0 ? SS_E / df_E : 1;
|
|
318
|
+
|
|
319
|
+
// F ratios
|
|
320
|
+
const F_R = MS_R / MS_E;
|
|
321
|
+
const F_T = MS_T / MS_E;
|
|
322
|
+
const F_L = MS_L / MS_E;
|
|
323
|
+
const F_RT = MS_RT / MS_E;
|
|
324
|
+
const F_RL = MS_RL / MS_E;
|
|
325
|
+
const F_TL = MS_TL / MS_E;
|
|
326
|
+
const F_RTL = MS_RTL / MS_E;
|
|
327
|
+
|
|
328
|
+
// Compute p-values from the F distribution CDF
|
|
329
|
+
const getP = (F, df1, df2) => fDistPValue(F, df1, df2);
|
|
330
|
+
|
|
331
|
+
const etaSq = (SS) => SST > 0 ? SS / SST : 0;
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
grandMean,
|
|
335
|
+
N,
|
|
336
|
+
marginalMeans: {
|
|
337
|
+
recognition: { standard: meanR0, recognition: meanR1 },
|
|
338
|
+
tutor: { single: meanT0, multi: meanT1 },
|
|
339
|
+
learner: { unified: meanL0, ego_superego: meanL1 },
|
|
340
|
+
},
|
|
341
|
+
mainEffects: {
|
|
342
|
+
recognition: { SS: SS_R, df: df_R, MS: MS_R, F: F_R, p: getP(F_R, df_R, df_E), etaSq: etaSq(SS_R) },
|
|
343
|
+
tutor: { SS: SS_T, df: df_T, MS: MS_T, F: F_T, p: getP(F_T, df_T, df_E), etaSq: etaSq(SS_T) },
|
|
344
|
+
learner: { SS: SS_L, df: df_L, MS: MS_L, F: F_L, p: getP(F_L, df_L, df_E), etaSq: etaSq(SS_L) },
|
|
345
|
+
},
|
|
346
|
+
interactions: {
|
|
347
|
+
recognition_x_tutor: { SS: SS_RT, df: df_RT, MS: MS_RT, F: F_RT, p: getP(F_RT, df_RT, df_E), etaSq: etaSq(SS_RT) },
|
|
348
|
+
recognition_x_learner: { SS: SS_RL, df: df_RL, MS: MS_RL, F: F_RL, p: getP(F_RL, df_RL, df_E), etaSq: etaSq(SS_RL) },
|
|
349
|
+
tutor_x_learner: { SS: SS_TL, df: df_TL, MS: MS_TL, F: F_TL, p: getP(F_TL, df_TL, df_E), etaSq: etaSq(SS_TL) },
|
|
350
|
+
three_way: { SS: SS_RTL, df: df_RTL, MS: MS_RTL, F: F_RTL, p: getP(F_RTL, df_RTL, df_E), etaSq: etaSq(SS_RTL) },
|
|
351
|
+
},
|
|
352
|
+
error: { SS: SS_E, df: df_E, MS: MS_E },
|
|
353
|
+
total: { SS: SST, df: df_T_total },
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Convert profile factor tags to ANOVA cell keys.
|
|
359
|
+
*
|
|
360
|
+
* @param {Object} factors - { recognition: bool, multi_agent_tutor: bool, multi_agent_learner: bool }
|
|
361
|
+
* @returns {string} Cell key like "r0_t1_l0"
|
|
362
|
+
*/
|
|
363
|
+
export function factorsToCellKey(factors) {
|
|
364
|
+
const r = factors.recognition ? 1 : 0;
|
|
365
|
+
const t = factors.multi_agent_tutor ? 1 : 0;
|
|
366
|
+
const l = factors.multi_agent_learner ? 1 : 0;
|
|
367
|
+
return `r${r}_t${t}_l${l}`;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Format ANOVA results as a text report.
|
|
372
|
+
*
|
|
373
|
+
* @param {Object} anovaResults - Output of runThreeWayANOVA()
|
|
374
|
+
* @param {Object} [options] - Formatting options
|
|
375
|
+
* @param {string} [options.scoreLabel] - Label for the score type (e.g. "Base Score", "Recognition Score")
|
|
376
|
+
* @returns {string} Formatted report
|
|
377
|
+
*/
|
|
378
|
+
export function formatANOVAReport(anovaResults, options = {}) {
|
|
379
|
+
const { scoreLabel } = options;
|
|
380
|
+
|
|
381
|
+
if (typeof anovaResults.error === 'string') {
|
|
382
|
+
return `ANOVA Error: ${anovaResults.error}`;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
const lines = [];
|
|
386
|
+
const title = scoreLabel
|
|
387
|
+
? `THREE-WAY ANOVA: ${scoreLabel.toUpperCase()}`
|
|
388
|
+
: 'THREE-WAY ANOVA: 2x2x2 FACTORIAL ANALYSIS';
|
|
389
|
+
|
|
390
|
+
lines.push('');
|
|
391
|
+
lines.push('='.repeat(70));
|
|
392
|
+
lines.push(` ${title}`);
|
|
393
|
+
lines.push('='.repeat(70));
|
|
394
|
+
lines.push(` Grand Mean: ${anovaResults.grandMean.toFixed(2)} | N = ${anovaResults.N}`);
|
|
395
|
+
lines.push('');
|
|
396
|
+
|
|
397
|
+
// Marginal means
|
|
398
|
+
const mm = anovaResults.marginalMeans;
|
|
399
|
+
lines.push('-'.repeat(70));
|
|
400
|
+
lines.push(' MARGINAL MEANS');
|
|
401
|
+
lines.push('-'.repeat(70));
|
|
402
|
+
lines.push(` Recognition: Standard = ${mm.recognition.standard.toFixed(2)}, Recognition = ${mm.recognition.recognition.toFixed(2)}`);
|
|
403
|
+
lines.push(` Tutor: Single = ${mm.tutor.single.toFixed(2)}, Multi-Agent = ${mm.tutor.multi.toFixed(2)}`);
|
|
404
|
+
lines.push(` Learner: Unified = ${mm.learner.unified.toFixed(2)}, Ego/Superego = ${mm.learner.ego_superego.toFixed(2)}`);
|
|
405
|
+
lines.push('');
|
|
406
|
+
|
|
407
|
+
// ANOVA table
|
|
408
|
+
lines.push('-'.repeat(70));
|
|
409
|
+
lines.push(' ANOVA TABLE');
|
|
410
|
+
lines.push('-'.repeat(70));
|
|
411
|
+
lines.push(' Source SS df MS F p eta2');
|
|
412
|
+
lines.push(' ' + '-'.repeat(66));
|
|
413
|
+
|
|
414
|
+
const formatRow = (name, data) => {
|
|
415
|
+
const ss = data.SS.toFixed(2).padStart(8);
|
|
416
|
+
const df = data.df.toString().padStart(6);
|
|
417
|
+
const ms = data.MS.toFixed(2).padStart(8);
|
|
418
|
+
const f = data.F.toFixed(3).padStart(8);
|
|
419
|
+
const p = data.p < 0.001 ? '< .001' : data.p.toFixed(3);
|
|
420
|
+
const eta = data.etaSq.toFixed(3).padStart(6);
|
|
421
|
+
const sig = data.p < 0.05 ? '***' : (data.p < 0.1 ? '*' : '');
|
|
422
|
+
return ` ${name.padEnd(22)} ${ss} ${df} ${ms} ${f} ${p.padStart(8)} ${eta} ${sig}`;
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
const me = anovaResults.mainEffects;
|
|
426
|
+
const ia = anovaResults.interactions;
|
|
427
|
+
|
|
428
|
+
lines.push(formatRow('Recognition (A)', me.recognition));
|
|
429
|
+
lines.push(formatRow('Tutor Architecture (B)', me.tutor));
|
|
430
|
+
lines.push(formatRow('Learner Arch. (C)', me.learner));
|
|
431
|
+
lines.push(' ' + '-'.repeat(66));
|
|
432
|
+
lines.push(formatRow('A x B', ia.recognition_x_tutor));
|
|
433
|
+
lines.push(formatRow('A x C', ia.recognition_x_learner));
|
|
434
|
+
lines.push(formatRow('B x C', ia.tutor_x_learner));
|
|
435
|
+
lines.push(formatRow('A x B x C', ia.three_way));
|
|
436
|
+
lines.push(' ' + '-'.repeat(66));
|
|
437
|
+
|
|
438
|
+
const err = anovaResults.error;
|
|
439
|
+
lines.push(` ${'Error'.padEnd(22)} ${err.SS.toFixed(2).padStart(8)} ${err.df.toString().padStart(6)} ${err.MS.toFixed(2).padStart(8)}`);
|
|
440
|
+
lines.push('');
|
|
441
|
+
lines.push(' Significance: *** p < .05, * p < .10');
|
|
442
|
+
lines.push('');
|
|
443
|
+
|
|
444
|
+
// Interpretation
|
|
445
|
+
lines.push('-'.repeat(70));
|
|
446
|
+
lines.push(' INTERPRETATION');
|
|
447
|
+
lines.push('-'.repeat(70));
|
|
448
|
+
|
|
449
|
+
const formatP = (p) => p < 0.001 ? '< .001' : `= .${p.toFixed(3).slice(2)}`;
|
|
450
|
+
|
|
451
|
+
if (me.recognition.p < 0.05) {
|
|
452
|
+
const effect = mm.recognition.recognition - mm.recognition.standard;
|
|
453
|
+
lines.push(` * Recognition prompts: SIGNIFICANT (F = ${me.recognition.F.toFixed(2)}, p ${formatP(me.recognition.p)})`);
|
|
454
|
+
lines.push(` Effect: ${effect >= 0 ? '+' : ''}${effect.toFixed(2)} points, eta2 = ${me.recognition.etaSq.toFixed(3)}`);
|
|
455
|
+
} else {
|
|
456
|
+
lines.push(` - Recognition prompts: not significant (F = ${me.recognition.F.toFixed(2)}, p ${formatP(me.recognition.p)})`);
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if (me.tutor.p < 0.05) {
|
|
460
|
+
const effect = mm.tutor.multi - mm.tutor.single;
|
|
461
|
+
lines.push(` * Multi-agent tutor: SIGNIFICANT (F = ${me.tutor.F.toFixed(2)}, p ${formatP(me.tutor.p)})`);
|
|
462
|
+
lines.push(` Effect: ${effect >= 0 ? '+' : ''}${effect.toFixed(2)} points, eta2 = ${me.tutor.etaSq.toFixed(3)}`);
|
|
463
|
+
} else {
|
|
464
|
+
lines.push(` - Multi-agent tutor: not significant (F = ${me.tutor.F.toFixed(2)}, p ${formatP(me.tutor.p)})`);
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
if (me.learner.p < 0.05) {
|
|
468
|
+
const effect = mm.learner.ego_superego - mm.learner.unified;
|
|
469
|
+
lines.push(` * Multi-agent learner: SIGNIFICANT (F = ${me.learner.F.toFixed(2)}, p ${formatP(me.learner.p)})`);
|
|
470
|
+
lines.push(` Effect: ${effect >= 0 ? '+' : ''}${effect.toFixed(2)} points, eta2 = ${me.learner.etaSq.toFixed(3)}`);
|
|
471
|
+
} else {
|
|
472
|
+
lines.push(` - Multi-agent learner: not significant (F = ${me.learner.F.toFixed(2)}, p ${formatP(me.learner.p)})`);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Interactions
|
|
476
|
+
lines.push('');
|
|
477
|
+
const interactions = [
|
|
478
|
+
{ key: 'recognition_x_tutor', label: 'Recognition x Tutor' },
|
|
479
|
+
{ key: 'recognition_x_learner', label: 'Recognition x Learner' },
|
|
480
|
+
{ key: 'tutor_x_learner', label: 'Tutor x Learner' },
|
|
481
|
+
{ key: 'three_way', label: 'Three-way' },
|
|
482
|
+
];
|
|
483
|
+
for (const { key, label } of interactions) {
|
|
484
|
+
if (ia[key].p < 0.05) {
|
|
485
|
+
lines.push(` * ${label} interaction: SIGNIFICANT (F = ${ia[key].F.toFixed(2)}, p ${formatP(ia[key].p)})`);
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
lines.push('');
|
|
490
|
+
lines.push('='.repeat(70));
|
|
491
|
+
|
|
492
|
+
return lines.join('\n');
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
export default {
|
|
496
|
+
runThreeWayANOVA,
|
|
497
|
+
factorsToCellKey,
|
|
498
|
+
formatANOVAReport,
|
|
499
|
+
};
|