@kernel.chat/kbot 3.41.0 → 3.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/agent-teams.d.ts +1 -1
- package/dist/agent-teams.d.ts.map +1 -1
- package/dist/agent-teams.js +36 -3
- package/dist/agent-teams.js.map +1 -1
- package/dist/agents/specialists.d.ts.map +1 -1
- package/dist/agents/specialists.js +20 -0
- package/dist/agents/specialists.js.map +1 -1
- package/dist/auth.d.ts +5 -1
- package/dist/auth.d.ts.map +1 -1
- package/dist/auth.js +1 -1
- package/dist/auth.js.map +1 -1
- package/dist/channels/kbot-channel.js +8 -31
- package/dist/channels/kbot-channel.js.map +1 -1
- package/dist/cli.js +44 -11
- package/dist/cli.js.map +1 -1
- package/dist/completions.d.ts.map +1 -1
- package/dist/completions.js +7 -0
- package/dist/completions.js.map +1 -1
- package/dist/digest.js +1 -1
- package/dist/digest.js.map +1 -1
- package/dist/doctor.d.ts.map +1 -1
- package/dist/doctor.js +132 -92
- package/dist/doctor.js.map +1 -1
- package/dist/doctor.test.d.ts +2 -0
- package/dist/doctor.test.d.ts.map +1 -0
- package/dist/doctor.test.js +432 -0
- package/dist/doctor.test.js.map +1 -0
- package/dist/email-service.d.ts.map +1 -1
- package/dist/email-service.js +1 -2
- package/dist/email-service.js.map +1 -1
- package/dist/episodic-memory.d.ts.map +1 -1
- package/dist/episodic-memory.js +14 -0
- package/dist/episodic-memory.js.map +1 -1
- package/dist/learned-router.d.ts.map +1 -1
- package/dist/learned-router.js +29 -0
- package/dist/learned-router.js.map +1 -1
- package/dist/tools/email.d.ts.map +1 -1
- package/dist/tools/email.js +2 -3
- package/dist/tools/email.js.map +1 -1
- package/dist/tools/hypothesis-engine.d.ts +2 -0
- package/dist/tools/hypothesis-engine.d.ts.map +1 -0
- package/dist/tools/hypothesis-engine.js +2276 -0
- package/dist/tools/hypothesis-engine.js.map +1 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +11 -1
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/lab-bio.d.ts +2 -0
- package/dist/tools/lab-bio.d.ts.map +1 -0
- package/dist/tools/lab-bio.js +1392 -0
- package/dist/tools/lab-bio.js.map +1 -0
- package/dist/tools/lab-chem.d.ts +2 -0
- package/dist/tools/lab-chem.d.ts.map +1 -0
- package/dist/tools/lab-chem.js +1257 -0
- package/dist/tools/lab-chem.js.map +1 -0
- package/dist/tools/lab-core.d.ts +2 -0
- package/dist/tools/lab-core.d.ts.map +1 -0
- package/dist/tools/lab-core.js +2452 -0
- package/dist/tools/lab-core.js.map +1 -0
- package/dist/tools/lab-data.d.ts +2 -0
- package/dist/tools/lab-data.d.ts.map +1 -0
- package/dist/tools/lab-data.js +2464 -0
- package/dist/tools/lab-data.js.map +1 -0
- package/dist/tools/lab-earth.d.ts +2 -0
- package/dist/tools/lab-earth.d.ts.map +1 -0
- package/dist/tools/lab-earth.js +1124 -0
- package/dist/tools/lab-earth.js.map +1 -0
- package/dist/tools/lab-math.d.ts +2 -0
- package/dist/tools/lab-math.d.ts.map +1 -0
- package/dist/tools/lab-math.js +3021 -0
- package/dist/tools/lab-math.js.map +1 -0
- package/dist/tools/lab-physics.d.ts +2 -0
- package/dist/tools/lab-physics.d.ts.map +1 -0
- package/dist/tools/lab-physics.js +2423 -0
- package/dist/tools/lab-physics.js.map +1 -0
- package/dist/tools/research-notebook.d.ts +2 -0
- package/dist/tools/research-notebook.d.ts.map +1 -0
- package/dist/tools/research-notebook.js +1165 -0
- package/dist/tools/research-notebook.js.map +1 -0
- package/dist/tools/research-pipeline.d.ts +2 -0
- package/dist/tools/research-pipeline.d.ts.map +1 -0
- package/dist/tools/research-pipeline.js +1094 -0
- package/dist/tools/research-pipeline.js.map +1 -0
- package/dist/tools/science-graph.d.ts +2 -0
- package/dist/tools/science-graph.d.ts.map +1 -0
- package/dist/tools/science-graph.js +995 -0
- package/dist/tools/science-graph.js.map +1 -0
- package/package.json +2 -3
|
@@ -0,0 +1,2464 @@
|
|
|
1
|
+
// kbot Data Analysis & Statistics Tools — Regression, Bayesian, time series, PCA, and more.
|
|
2
|
+
// All computations are pure TypeScript — zero external dependencies.
|
|
3
|
+
// Implements numerical methods: normal equations, eigendecomposition, MLE, KS test, etc.
|
|
4
|
+
import { registerTool } from './index.js';
|
|
5
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
6
|
+
// SHARED MATH UTILITIES
|
|
7
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
8
|
+
/** Format a number to fixed decimal places */
|
|
9
|
+
function fmt(n, d = 4) {
|
|
10
|
+
if (!isFinite(n))
|
|
11
|
+
return String(n);
|
|
12
|
+
return n.toFixed(d);
|
|
13
|
+
}
|
|
14
|
+
/** Mean of an array */
|
|
15
|
+
function mean(arr) {
|
|
16
|
+
if (arr.length === 0)
|
|
17
|
+
return 0;
|
|
18
|
+
return arr.reduce((s, v) => s + v, 0) / arr.length;
|
|
19
|
+
}
|
|
20
|
+
/** Variance (sample) */
|
|
21
|
+
function variance(arr) {
|
|
22
|
+
if (arr.length < 2)
|
|
23
|
+
return 0;
|
|
24
|
+
const m = mean(arr);
|
|
25
|
+
return arr.reduce((s, v) => s + (v - m) ** 2, 0) / (arr.length - 1);
|
|
26
|
+
}
|
|
27
|
+
/** Standard deviation (sample) */
|
|
28
|
+
function stddev(arr) {
|
|
29
|
+
return Math.sqrt(variance(arr));
|
|
30
|
+
}
|
|
31
|
+
/** Median */
|
|
32
|
+
function median(arr) {
|
|
33
|
+
const sorted = [...arr].sort((a, b) => a - b);
|
|
34
|
+
const mid = Math.floor(sorted.length / 2);
|
|
35
|
+
return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
|
|
36
|
+
}
|
|
37
|
+
/** Sum */
|
|
38
|
+
function sum(arr) {
|
|
39
|
+
return arr.reduce((s, v) => s + v, 0);
|
|
40
|
+
}
|
|
41
|
+
/** Normal CDF — Abramowitz & Stegun rational approximation (formula 26.2.17) */
|
|
42
|
+
function normalCDF(x) {
|
|
43
|
+
if (x === Infinity)
|
|
44
|
+
return 1;
|
|
45
|
+
if (x === -Infinity)
|
|
46
|
+
return 0;
|
|
47
|
+
const a1 = 0.254829592;
|
|
48
|
+
const a2 = -0.284496736;
|
|
49
|
+
const a3 = 1.421413741;
|
|
50
|
+
const a4 = -1.453152027;
|
|
51
|
+
const a5 = 1.061405429;
|
|
52
|
+
const p = 0.3275911;
|
|
53
|
+
const sign = x < 0 ? -1 : 1;
|
|
54
|
+
const z = Math.abs(x) / Math.SQRT2;
|
|
55
|
+
const t = 1.0 / (1.0 + p * z);
|
|
56
|
+
const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-z * z);
|
|
57
|
+
return 0.5 * (1.0 + sign * y);
|
|
58
|
+
}
|
|
59
|
+
/** Normal PDF */
|
|
60
|
+
function normalPDF(x) {
|
|
61
|
+
return Math.exp(-0.5 * x * x) / Math.sqrt(2 * Math.PI);
|
|
62
|
+
}
|
|
63
|
+
/** Normal quantile (inverse CDF) via rational approximation */
|
|
64
|
+
function normalQuantile(p) {
|
|
65
|
+
if (p <= 0)
|
|
66
|
+
return -Infinity;
|
|
67
|
+
if (p >= 1)
|
|
68
|
+
return Infinity;
|
|
69
|
+
if (p === 0.5)
|
|
70
|
+
return 0;
|
|
71
|
+
// Rational approximation for central region
|
|
72
|
+
const a = [
|
|
73
|
+
-3.969683028665376e1, 2.209460984245205e2,
|
|
74
|
+
-2.759285104469687e2, 1.383577518672690e2,
|
|
75
|
+
-3.066479806614716e1, 2.506628277459239e0,
|
|
76
|
+
];
|
|
77
|
+
const b = [
|
|
78
|
+
-5.447609879822406e1, 1.615858368580409e2,
|
|
79
|
+
-1.556989798598866e2, 6.680131188771972e1,
|
|
80
|
+
-1.328068155288572e1,
|
|
81
|
+
];
|
|
82
|
+
const c = [
|
|
83
|
+
-7.784894002430293e-3, -3.223964580411365e-1,
|
|
84
|
+
-2.400758277161838e0, -2.549732539343734e0,
|
|
85
|
+
4.374664141464968e0, 2.938163982698783e0,
|
|
86
|
+
];
|
|
87
|
+
const d = [
|
|
88
|
+
7.784695709041462e-3, 3.224671290700398e-1,
|
|
89
|
+
2.445134137142996e0, 3.754408661907416e0,
|
|
90
|
+
];
|
|
91
|
+
const pLow = 0.02425;
|
|
92
|
+
const pHigh = 1 - pLow;
|
|
93
|
+
let q, r;
|
|
94
|
+
if (p < pLow) {
|
|
95
|
+
q = Math.sqrt(-2 * Math.log(p));
|
|
96
|
+
return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /
|
|
97
|
+
((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1);
|
|
98
|
+
}
|
|
99
|
+
else if (p <= pHigh) {
|
|
100
|
+
q = p - 0.5;
|
|
101
|
+
r = q * q;
|
|
102
|
+
return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q /
|
|
103
|
+
(((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1);
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
q = Math.sqrt(-2 * Math.log(1 - p));
|
|
107
|
+
const num = ((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5];
|
|
108
|
+
const den = (((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1;
|
|
109
|
+
return -(num / den);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/** Log-gamma function (Lanczos approximation) */
|
|
113
|
+
function logGamma(x) {
|
|
114
|
+
if (x <= 0)
|
|
115
|
+
return Infinity;
|
|
116
|
+
const g = 7;
|
|
117
|
+
const coef = [
|
|
118
|
+
0.99999999999980993, 676.5203681218851, -1259.1392167224028,
|
|
119
|
+
771.32342877765313, -176.61502916214059, 12.507343278686905,
|
|
120
|
+
-0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7,
|
|
121
|
+
];
|
|
122
|
+
let sum = coef[0];
|
|
123
|
+
for (let i = 1; i < g + 2; i++) {
|
|
124
|
+
sum += coef[i] / (x + i - 1);
|
|
125
|
+
}
|
|
126
|
+
const t = x + g - 0.5;
|
|
127
|
+
return 0.5 * Math.log(2 * Math.PI) + (x - 0.5) * Math.log(t) - t + Math.log(sum);
|
|
128
|
+
}
|
|
129
|
+
/** Gamma function */
|
|
130
|
+
function gamma(x) {
|
|
131
|
+
return Math.exp(logGamma(x));
|
|
132
|
+
}
|
|
133
|
+
/** Regularized lower incomplete gamma function P(a, x) via series expansion */
|
|
134
|
+
function lowerIncompleteGammaP(a, x) {
|
|
135
|
+
if (x < 0)
|
|
136
|
+
return 0;
|
|
137
|
+
if (x === 0)
|
|
138
|
+
return 0;
|
|
139
|
+
if (a <= 0)
|
|
140
|
+
return 1;
|
|
141
|
+
// For x < a+1, use series expansion
|
|
142
|
+
if (x < a + 1) {
|
|
143
|
+
let term = 1 / a;
|
|
144
|
+
let sum = term;
|
|
145
|
+
for (let n = 1; n < 200; n++) {
|
|
146
|
+
term *= x / (a + n);
|
|
147
|
+
sum += term;
|
|
148
|
+
if (Math.abs(term) < 1e-14 * Math.abs(sum))
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
return sum * Math.exp(-x + a * Math.log(x) - logGamma(a));
|
|
152
|
+
}
|
|
153
|
+
// For x >= a+1, use continued fraction (upper gamma) then subtract
|
|
154
|
+
return 1 - upperIncompleteGammaQ(a, x);
|
|
155
|
+
}
|
|
156
|
+
/** Regularized upper incomplete gamma function Q(a, x) via continued fraction */
|
|
157
|
+
function upperIncompleteGammaQ(a, x) {
|
|
158
|
+
if (x < 0)
|
|
159
|
+
return 1;
|
|
160
|
+
if (x === 0)
|
|
161
|
+
return 1;
|
|
162
|
+
// Continued fraction via Lentz's method
|
|
163
|
+
let f = x + 1 - a;
|
|
164
|
+
if (Math.abs(f) < 1e-30)
|
|
165
|
+
f = 1e-30;
|
|
166
|
+
let C = f;
|
|
167
|
+
let D = 0;
|
|
168
|
+
for (let i = 1; i < 200; i++) {
|
|
169
|
+
const an = i * (a - i);
|
|
170
|
+
const bn = x + 2 * i + 1 - a;
|
|
171
|
+
D = bn + an * D;
|
|
172
|
+
if (Math.abs(D) < 1e-30)
|
|
173
|
+
D = 1e-30;
|
|
174
|
+
C = bn + an / C;
|
|
175
|
+
if (Math.abs(C) < 1e-30)
|
|
176
|
+
C = 1e-30;
|
|
177
|
+
D = 1 / D;
|
|
178
|
+
const delta = C * D;
|
|
179
|
+
f *= delta;
|
|
180
|
+
if (Math.abs(delta - 1) < 1e-14)
|
|
181
|
+
break;
|
|
182
|
+
}
|
|
183
|
+
return Math.exp(-x + a * Math.log(x) - logGamma(a)) / f;
|
|
184
|
+
}
|
|
185
|
+
/** Chi-square CDF */
|
|
186
|
+
function chiSquareCDF(x, df) {
|
|
187
|
+
if (x <= 0)
|
|
188
|
+
return 0;
|
|
189
|
+
return lowerIncompleteGammaP(df / 2, x / 2);
|
|
190
|
+
}
|
|
191
|
+
/** Regularized incomplete beta function I_x(a, b) via continued fraction */
|
|
192
|
+
function incompleteBeta(x, a, b) {
|
|
193
|
+
if (x <= 0)
|
|
194
|
+
return 0;
|
|
195
|
+
if (x >= 1)
|
|
196
|
+
return 1;
|
|
197
|
+
// Symmetry relation for numerical stability
|
|
198
|
+
if (x > (a + 1) / (a + b + 2)) {
|
|
199
|
+
return 1 - incompleteBeta(1 - x, b, a);
|
|
200
|
+
}
|
|
201
|
+
const lnBeta = logGamma(a) + logGamma(b) - logGamma(a + b);
|
|
202
|
+
const front = Math.exp(a * Math.log(x) + b * Math.log(1 - x) - lnBeta) / a;
|
|
203
|
+
// Lentz's continued fraction
|
|
204
|
+
let f = 1, C = 1, D = 0;
|
|
205
|
+
for (let m = 0; m <= 200; m++) {
|
|
206
|
+
let numerator;
|
|
207
|
+
if (m === 0) {
|
|
208
|
+
numerator = 1;
|
|
209
|
+
}
|
|
210
|
+
else if (m % 2 === 0) {
|
|
211
|
+
const k = m / 2;
|
|
212
|
+
numerator = (k * (b - k) * x) / ((a + 2 * k - 1) * (a + 2 * k));
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
const k = (m - 1) / 2;
|
|
216
|
+
numerator = -((a + k) * (a + b + k) * x) / ((a + 2 * k) * (a + 2 * k + 1));
|
|
217
|
+
}
|
|
218
|
+
D = 1 + numerator * D;
|
|
219
|
+
if (Math.abs(D) < 1e-30)
|
|
220
|
+
D = 1e-30;
|
|
221
|
+
D = 1 / D;
|
|
222
|
+
C = 1 + numerator / C;
|
|
223
|
+
if (Math.abs(C) < 1e-30)
|
|
224
|
+
C = 1e-30;
|
|
225
|
+
f *= C * D;
|
|
226
|
+
if (Math.abs(C * D - 1) < 1e-14)
|
|
227
|
+
break;
|
|
228
|
+
}
|
|
229
|
+
return front * f;
|
|
230
|
+
}
|
|
231
|
+
/** Student's t CDF */
|
|
232
|
+
function tCDF(t, df) {
|
|
233
|
+
if (df <= 0)
|
|
234
|
+
return NaN;
|
|
235
|
+
const x = df / (df + t * t);
|
|
236
|
+
const ibeta = incompleteBeta(x, df / 2, 0.5);
|
|
237
|
+
if (t >= 0) {
|
|
238
|
+
return 1 - 0.5 * ibeta;
|
|
239
|
+
}
|
|
240
|
+
else {
|
|
241
|
+
return 0.5 * ibeta;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
/** F-distribution CDF */
|
|
245
|
+
function fCDF(x, df1, df2) {
|
|
246
|
+
if (x <= 0)
|
|
247
|
+
return 0;
|
|
248
|
+
const z = (df1 * x) / (df1 * x + df2);
|
|
249
|
+
return incompleteBeta(z, df1 / 2, df2 / 2);
|
|
250
|
+
}
|
|
251
|
+
/** Parse comma-separated string to number array */
|
|
252
|
+
function parseCSV(s) {
|
|
253
|
+
return s.split(',').map(v => parseFloat(v.trim())).filter(v => isFinite(v));
|
|
254
|
+
}
|
|
255
|
+
/** Rank data (average ranks for ties) */
|
|
256
|
+
function rank(data) {
|
|
257
|
+
const indexed = data.map((v, i) => ({ v, i }));
|
|
258
|
+
indexed.sort((a, b) => a.v - b.v);
|
|
259
|
+
const ranks = new Array(data.length);
|
|
260
|
+
let i = 0;
|
|
261
|
+
while (i < indexed.length) {
|
|
262
|
+
let j = i;
|
|
263
|
+
while (j < indexed.length && indexed[j].v === indexed[i].v)
|
|
264
|
+
j++;
|
|
265
|
+
const avgRank = (i + j + 1) / 2; // average of 1-based ranks
|
|
266
|
+
for (let k = i; k < j; k++)
|
|
267
|
+
ranks[indexed[k].i] = avgRank;
|
|
268
|
+
i = j;
|
|
269
|
+
}
|
|
270
|
+
return ranks;
|
|
271
|
+
}
|
|
272
|
+
function matCreate(rows, cols, fill = 0) {
|
|
273
|
+
return Array.from({ length: rows }, () => new Array(cols).fill(fill));
|
|
274
|
+
}
|
|
275
|
+
function matTranspose(A) {
|
|
276
|
+
const rows = A.length, cols = A[0].length;
|
|
277
|
+
const T = matCreate(cols, rows);
|
|
278
|
+
for (let i = 0; i < rows; i++)
|
|
279
|
+
for (let j = 0; j < cols; j++)
|
|
280
|
+
T[j][i] = A[i][j];
|
|
281
|
+
return T;
|
|
282
|
+
}
|
|
283
|
+
function matMul(A, B) {
|
|
284
|
+
const aRows = A.length, aCols = A[0].length, bCols = B[0].length;
|
|
285
|
+
const C = matCreate(aRows, bCols);
|
|
286
|
+
for (let i = 0; i < aRows; i++)
|
|
287
|
+
for (let j = 0; j < bCols; j++)
|
|
288
|
+
for (let k = 0; k < aCols; k++)
|
|
289
|
+
C[i][j] += A[i][k] * B[k][j];
|
|
290
|
+
return C;
|
|
291
|
+
}
|
|
292
|
+
/** Solve Ax = b via Gaussian elimination with partial pivoting */
|
|
293
|
+
function matSolve(A, b) {
|
|
294
|
+
const n = A.length;
|
|
295
|
+
// Augmented matrix
|
|
296
|
+
const aug = A.map((row, i) => [...row, b[i]]);
|
|
297
|
+
for (let col = 0; col < n; col++) {
|
|
298
|
+
// Partial pivoting
|
|
299
|
+
let maxRow = col;
|
|
300
|
+
for (let row = col + 1; row < n; row++) {
|
|
301
|
+
if (Math.abs(aug[row][col]) > Math.abs(aug[maxRow][col]))
|
|
302
|
+
maxRow = row;
|
|
303
|
+
}
|
|
304
|
+
;
|
|
305
|
+
[aug[col], aug[maxRow]] = [aug[maxRow], aug[col]];
|
|
306
|
+
if (Math.abs(aug[col][col]) < 1e-12) {
|
|
307
|
+
throw new Error('Matrix is singular or nearly singular');
|
|
308
|
+
}
|
|
309
|
+
// Eliminate below
|
|
310
|
+
for (let row = col + 1; row < n; row++) {
|
|
311
|
+
const factor = aug[row][col] / aug[col][col];
|
|
312
|
+
for (let j = col; j <= n; j++)
|
|
313
|
+
aug[row][j] -= factor * aug[col][j];
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Back substitution
|
|
317
|
+
const x = new Array(n).fill(0);
|
|
318
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
319
|
+
x[i] = aug[i][n];
|
|
320
|
+
for (let j = i + 1; j < n; j++)
|
|
321
|
+
x[i] -= aug[i][j] * x[j];
|
|
322
|
+
x[i] /= aug[i][i];
|
|
323
|
+
}
|
|
324
|
+
return x;
|
|
325
|
+
}
|
|
326
|
+
/** Matrix inverse via Gauss-Jordan elimination */
|
|
327
|
+
function matInverse(A) {
|
|
328
|
+
const n = A.length;
|
|
329
|
+
const aug = A.map((row, i) => {
|
|
330
|
+
const ext = new Array(n).fill(0);
|
|
331
|
+
ext[i] = 1;
|
|
332
|
+
return [...row, ...ext];
|
|
333
|
+
});
|
|
334
|
+
for (let col = 0; col < n; col++) {
|
|
335
|
+
let maxRow = col;
|
|
336
|
+
for (let row = col + 1; row < n; row++) {
|
|
337
|
+
if (Math.abs(aug[row][col]) > Math.abs(aug[maxRow][col]))
|
|
338
|
+
maxRow = row;
|
|
339
|
+
}
|
|
340
|
+
;
|
|
341
|
+
[aug[col], aug[maxRow]] = [aug[maxRow], aug[col]];
|
|
342
|
+
if (Math.abs(aug[col][col]) < 1e-12)
|
|
343
|
+
throw new Error('Singular matrix');
|
|
344
|
+
const pivot = aug[col][col];
|
|
345
|
+
for (let j = 0; j < 2 * n; j++)
|
|
346
|
+
aug[col][j] /= pivot;
|
|
347
|
+
for (let row = 0; row < n; row++) {
|
|
348
|
+
if (row === col)
|
|
349
|
+
continue;
|
|
350
|
+
const factor = aug[row][col];
|
|
351
|
+
for (let j = 0; j < 2 * n; j++)
|
|
352
|
+
aug[row][j] -= factor * aug[col][j];
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return aug.map(row => row.slice(n));
|
|
356
|
+
}
|
|
357
|
+
/** Eigenvalues and eigenvectors of a symmetric matrix via Jacobi iteration */
|
|
358
|
+
function symmetricEigen(A) {
|
|
359
|
+
const n = A.length;
|
|
360
|
+
const S = A.map(row => [...row]);
|
|
361
|
+
const V = matCreate(n, n);
|
|
362
|
+
for (let i = 0; i < n; i++)
|
|
363
|
+
V[i][i] = 1;
|
|
364
|
+
const maxIter = 100 * n * n;
|
|
365
|
+
for (let iter = 0; iter < maxIter; iter++) {
|
|
366
|
+
// Find largest off-diagonal element
|
|
367
|
+
let maxVal = 0, p = 0, q = 1;
|
|
368
|
+
for (let i = 0; i < n; i++) {
|
|
369
|
+
for (let j = i + 1; j < n; j++) {
|
|
370
|
+
if (Math.abs(S[i][j]) > maxVal) {
|
|
371
|
+
maxVal = Math.abs(S[i][j]);
|
|
372
|
+
p = i;
|
|
373
|
+
q = j;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
if (maxVal < 1e-12)
|
|
378
|
+
break;
|
|
379
|
+
// Compute rotation
|
|
380
|
+
const theta = (S[q][q] - S[p][p]) / (2 * S[p][q]);
|
|
381
|
+
const t = Math.sign(theta) / (Math.abs(theta) + Math.sqrt(theta * theta + 1));
|
|
382
|
+
const c = 1 / Math.sqrt(t * t + 1);
|
|
383
|
+
const s = t * c;
|
|
384
|
+
// Apply rotation to S
|
|
385
|
+
const Spp = S[p][p], Sqq = S[q][q], Spq = S[p][q];
|
|
386
|
+
S[p][p] = c * c * Spp - 2 * s * c * Spq + s * s * Sqq;
|
|
387
|
+
S[q][q] = s * s * Spp + 2 * s * c * Spq + c * c * Sqq;
|
|
388
|
+
S[p][q] = 0;
|
|
389
|
+
S[q][p] = 0;
|
|
390
|
+
for (let i = 0; i < n; i++) {
|
|
391
|
+
if (i !== p && i !== q) {
|
|
392
|
+
const Sip = S[i][p], Siq = S[i][q];
|
|
393
|
+
S[i][p] = c * Sip - s * Siq;
|
|
394
|
+
S[p][i] = S[i][p];
|
|
395
|
+
S[i][q] = s * Sip + c * Siq;
|
|
396
|
+
S[q][i] = S[i][q];
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Accumulate eigenvectors
|
|
400
|
+
for (let i = 0; i < n; i++) {
|
|
401
|
+
const Vip = V[i][p], Viq = V[i][q];
|
|
402
|
+
V[i][p] = c * Vip - s * Viq;
|
|
403
|
+
V[i][q] = s * Vip + c * Viq;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
const values = Array.from({ length: n }, (_, i) => S[i][i]);
|
|
407
|
+
return { values, vectors: V };
|
|
408
|
+
}
|
|
409
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
410
|
+
// TOOL IMPLEMENTATIONS
|
|
411
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
412
|
+
export function registerLabDataTools() {
|
|
413
|
+
// ── 1. REGRESSION ANALYSIS ──
|
|
414
|
+
registerTool({
|
|
415
|
+
name: 'regression_analysis',
|
|
416
|
+
description: 'Perform regression analysis — linear, polynomial, logistic, or exponential. Implements least squares via normal equations. Returns coefficients, R², adjusted R², p-values, and residual analysis.',
|
|
417
|
+
parameters: {
|
|
418
|
+
x_data: { type: 'string', description: 'X values (comma-separated numbers)', required: true },
|
|
419
|
+
y_data: { type: 'string', description: 'Y values (comma-separated numbers)', required: true },
|
|
420
|
+
model_type: { type: 'string', description: 'Model type: linear, polynomial, logistic, or exponential', required: true },
|
|
421
|
+
degree: { type: 'number', description: 'Polynomial degree (default 2, only for polynomial model)' },
|
|
422
|
+
},
|
|
423
|
+
tier: 'free',
|
|
424
|
+
async execute(args) {
|
|
425
|
+
const x = parseCSV(String(args.x_data));
|
|
426
|
+
const y = parseCSV(String(args.y_data));
|
|
427
|
+
if (x.length !== y.length || x.length < 2) {
|
|
428
|
+
return '**Error**: x_data and y_data must have the same length (minimum 2 points).';
|
|
429
|
+
}
|
|
430
|
+
const modelType = String(args.model_type).toLowerCase();
|
|
431
|
+
const degree = Number(args.degree) || 2;
|
|
432
|
+
const n = x.length;
|
|
433
|
+
let coefficients;
|
|
434
|
+
let yPred;
|
|
435
|
+
let paramNames;
|
|
436
|
+
if (modelType === 'linear') {
|
|
437
|
+
// y = b0 + b1*x — normal equations: (X'X)^-1 X'y
|
|
438
|
+
const X = x.map(xi => [1, xi]);
|
|
439
|
+
const Xt = matTranspose(X);
|
|
440
|
+
const XtX = matMul(Xt, X);
|
|
441
|
+
const Xty = matMul(Xt, y.map(yi => [yi]));
|
|
442
|
+
coefficients = matSolve(XtX, Xty.map(r => r[0]));
|
|
443
|
+
yPred = x.map(xi => coefficients[0] + coefficients[1] * xi);
|
|
444
|
+
paramNames = ['intercept', 'slope'];
|
|
445
|
+
}
|
|
446
|
+
else if (modelType === 'polynomial') {
|
|
447
|
+
// y = b0 + b1*x + b2*x^2 + ... + bd*x^d
|
|
448
|
+
const deg = Math.min(degree, n - 1);
|
|
449
|
+
const X = x.map(xi => {
|
|
450
|
+
const row = [];
|
|
451
|
+
for (let d = 0; d <= deg; d++)
|
|
452
|
+
row.push(xi ** d);
|
|
453
|
+
return row;
|
|
454
|
+
});
|
|
455
|
+
const Xt = matTranspose(X);
|
|
456
|
+
const XtX = matMul(Xt, X);
|
|
457
|
+
const Xty = matMul(Xt, y.map(yi => [yi]));
|
|
458
|
+
coefficients = matSolve(XtX, Xty.map(r => r[0]));
|
|
459
|
+
yPred = x.map(xi => {
|
|
460
|
+
let val = 0;
|
|
461
|
+
for (let d = 0; d <= deg; d++)
|
|
462
|
+
val += coefficients[d] * (xi ** d);
|
|
463
|
+
return val;
|
|
464
|
+
});
|
|
465
|
+
paramNames = Array.from({ length: deg + 1 }, (_, i) => i === 0 ? 'intercept' : `x^${i}`);
|
|
466
|
+
}
|
|
467
|
+
else if (modelType === 'exponential') {
|
|
468
|
+
// y = a * e^(b*x) — linearize: ln(y) = ln(a) + b*x
|
|
469
|
+
const yLog = y.map(yi => Math.log(Math.max(yi, 1e-10)));
|
|
470
|
+
const X = x.map(xi => [1, xi]);
|
|
471
|
+
const Xt = matTranspose(X);
|
|
472
|
+
const XtX = matMul(Xt, X);
|
|
473
|
+
const Xty = matMul(Xt, yLog.map(yi => [yi]));
|
|
474
|
+
const linCoeffs = matSolve(XtX, Xty.map(r => r[0]));
|
|
475
|
+
coefficients = [Math.exp(linCoeffs[0]), linCoeffs[1]];
|
|
476
|
+
yPred = x.map(xi => coefficients[0] * Math.exp(coefficients[1] * xi));
|
|
477
|
+
paramNames = ['a (amplitude)', 'b (rate)'];
|
|
478
|
+
}
|
|
479
|
+
else if (modelType === 'logistic') {
|
|
480
|
+
// y = L / (1 + e^(-k*(x - x0))) — simplified: L=1, fit via iterative least squares
|
|
481
|
+
// Initialize with reasonable guesses
|
|
482
|
+
const L = Math.max(...y) * 1.1;
|
|
483
|
+
let k = 1;
|
|
484
|
+
let x0 = median(x);
|
|
485
|
+
// Gradient descent
|
|
486
|
+
const lr = 0.001;
|
|
487
|
+
for (let iter = 0; iter < 2000; iter++) {
|
|
488
|
+
let dL = 0, dk = 0, dx0 = 0;
|
|
489
|
+
for (let i = 0; i < n; i++) {
|
|
490
|
+
const exponent = -k * (x[i] - x0);
|
|
491
|
+
const expVal = Math.exp(Math.max(-500, Math.min(500, exponent)));
|
|
492
|
+
const pred = L / (1 + expVal);
|
|
493
|
+
const err = y[i] - pred;
|
|
494
|
+
const denom = (1 + expVal) ** 2;
|
|
495
|
+
dL += -err / (1 + expVal) * (-1);
|
|
496
|
+
dk += -err * L * (x[i] - x0) * expVal / denom * (-1);
|
|
497
|
+
dx0 += -err * L * (-k) * expVal / denom * (-1);
|
|
498
|
+
}
|
|
499
|
+
k -= lr * dk / n;
|
|
500
|
+
x0 -= lr * dx0 / n;
|
|
501
|
+
}
|
|
502
|
+
coefficients = [L, k, x0];
|
|
503
|
+
yPred = x.map(xi => {
|
|
504
|
+
const exponent = -k * (xi - x0);
|
|
505
|
+
return L / (1 + Math.exp(Math.max(-500, Math.min(500, exponent))));
|
|
506
|
+
});
|
|
507
|
+
paramNames = ['L (supremum)', 'k (steepness)', 'x0 (midpoint)'];
|
|
508
|
+
}
|
|
509
|
+
else {
|
|
510
|
+
return `**Error**: Unknown model_type "${modelType}". Use: linear, polynomial, logistic, exponential.`;
|
|
511
|
+
}
|
|
512
|
+
// R² and adjusted R²
|
|
513
|
+
const yMean = mean(y);
|
|
514
|
+
const ssTot = y.reduce((s, yi) => s + (yi - yMean) ** 2, 0);
|
|
515
|
+
const ssRes = y.reduce((s, yi, i) => s + (yi - yPred[i]) ** 2, 0);
|
|
516
|
+
const rSquared = ssTot > 0 ? 1 - ssRes / ssTot : 0;
|
|
517
|
+
const p = coefficients.length; // number of parameters
|
|
518
|
+
const adjRSquared = 1 - ((1 - rSquared) * (n - 1)) / (n - p - 1);
|
|
519
|
+
// Residuals
|
|
520
|
+
const residuals = y.map((yi, i) => yi - yPred[i]);
|
|
521
|
+
const mse = ssRes / (n - p);
|
|
522
|
+
const rmse = Math.sqrt(mse);
|
|
523
|
+
// Standard errors and t-statistics for linear/polynomial
|
|
524
|
+
let pValues = [];
|
|
525
|
+
if (modelType === 'linear' || modelType === 'polynomial') {
|
|
526
|
+
const deg = modelType === 'linear' ? 1 : Math.min(degree, n - 1);
|
|
527
|
+
const X = x.map(xi => {
|
|
528
|
+
const row = [];
|
|
529
|
+
for (let d = 0; d <= deg; d++)
|
|
530
|
+
row.push(xi ** d);
|
|
531
|
+
return row;
|
|
532
|
+
});
|
|
533
|
+
try {
|
|
534
|
+
const Xt = matTranspose(X);
|
|
535
|
+
const XtXInv = matInverse(matMul(Xt, X));
|
|
536
|
+
const se = coefficients.map((_, j) => Math.sqrt(Math.max(0, mse * XtXInv[j][j])));
|
|
537
|
+
const tStats = coefficients.map((c, j) => se[j] > 0 ? c / se[j] : 0);
|
|
538
|
+
pValues = tStats.map(t => {
|
|
539
|
+
const pVal = 2 * (1 - tCDF(Math.abs(t), n - p));
|
|
540
|
+
return pVal < 0.001 ? '<0.001' : fmt(pVal, 4);
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
catch {
|
|
544
|
+
pValues = coefficients.map(() => 'N/A');
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
else {
|
|
548
|
+
pValues = coefficients.map(() => 'N/A (nonlinear)');
|
|
549
|
+
}
|
|
550
|
+
// F-statistic
|
|
551
|
+
const fStat = ssTot > 0 && p > 1 ? ((ssTot - ssRes) / (p - 1)) / mse : 0;
|
|
552
|
+
const fPValue = fStat > 0 ? 1 - fCDF(fStat, p - 1, n - p) : 1;
|
|
553
|
+
// Residual analysis
|
|
554
|
+
const residMean = mean(residuals);
|
|
555
|
+
const residStd = stddev(residuals);
|
|
556
|
+
const sortedResid = [...residuals].sort((a, b) => a - b);
|
|
557
|
+
const durbin_watson = residuals.slice(1).reduce((s, r, i) => s + (r - residuals[i]) ** 2, 0) / ssRes;
|
|
558
|
+
// Equation string
|
|
559
|
+
let equation = '';
|
|
560
|
+
if (modelType === 'linear') {
|
|
561
|
+
equation = `y = ${fmt(coefficients[0])} + ${fmt(coefficients[1])} * x`;
|
|
562
|
+
}
|
|
563
|
+
else if (modelType === 'polynomial') {
|
|
564
|
+
equation = coefficients.map((c, i) => i === 0 ? fmt(c) : `${fmt(c)} * x^${i}`).join(' + ');
|
|
565
|
+
equation = `y = ${equation}`;
|
|
566
|
+
}
|
|
567
|
+
else if (modelType === 'exponential') {
|
|
568
|
+
equation = `y = ${fmt(coefficients[0])} * e^(${fmt(coefficients[1])} * x)`;
|
|
569
|
+
}
|
|
570
|
+
else if (modelType === 'logistic') {
|
|
571
|
+
equation = `y = ${fmt(coefficients[0])} / (1 + e^(-${fmt(coefficients[1])} * (x - ${fmt(coefficients[2])})))`;
|
|
572
|
+
}
|
|
573
|
+
let out = `## Regression Analysis — ${modelType.charAt(0).toUpperCase() + modelType.slice(1)}\n\n`;
|
|
574
|
+
out += `**Equation:** \`${equation}\`\n\n`;
|
|
575
|
+
out += `### Coefficients\n\n`;
|
|
576
|
+
out += `| Parameter | Value | p-value |\n|---|---|---|\n`;
|
|
577
|
+
coefficients.forEach((c, i) => {
|
|
578
|
+
out += `| ${paramNames[i]} | ${fmt(c, 6)} | ${pValues[i]} |\n`;
|
|
579
|
+
});
|
|
580
|
+
out += `\n### Model Fit\n\n`;
|
|
581
|
+
out += `| Metric | Value |\n|---|---|\n`;
|
|
582
|
+
out += `| R² | ${fmt(rSquared, 6)} |\n`;
|
|
583
|
+
out += `| Adjusted R² | ${fmt(adjRSquared, 6)} |\n`;
|
|
584
|
+
out += `| RMSE | ${fmt(rmse, 6)} |\n`;
|
|
585
|
+
out += `| F-statistic | ${fmt(fStat, 4)} (p = ${fPValue < 0.001 ? '<0.001' : fmt(fPValue, 4)}) |\n`;
|
|
586
|
+
out += `| Durbin-Watson | ${fmt(durbin_watson, 4)} |\n`;
|
|
587
|
+
out += `| N | ${n} |\n`;
|
|
588
|
+
out += `\n### Residual Summary\n\n`;
|
|
589
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
590
|
+
out += `| Mean | ${fmt(residMean, 6)} |\n`;
|
|
591
|
+
out += `| Std Dev | ${fmt(residStd, 6)} |\n`;
|
|
592
|
+
out += `| Min | ${fmt(sortedResid[0], 6)} |\n`;
|
|
593
|
+
out += `| Median | ${fmt(median(residuals), 6)} |\n`;
|
|
594
|
+
out += `| Max | ${fmt(sortedResid[sortedResid.length - 1], 6)} |\n`;
|
|
595
|
+
return out;
|
|
596
|
+
},
|
|
597
|
+
});
|
|
598
|
+
// ── 2. BAYESIAN INFERENCE ──
|
|
599
|
+
registerTool({
|
|
600
|
+
name: 'bayesian_inference',
|
|
601
|
+
description: 'Conjugate prior Bayesian analysis. Supports Beta-Binomial, Normal-Normal, Gamma-Poisson, and Dirichlet-Multinomial models. Returns posterior parameters, credible intervals, and Bayes factor.',
|
|
602
|
+
parameters: {
|
|
603
|
+
prior_type: { type: 'string', description: 'Prior distribution: beta-binomial, normal-normal, gamma-poisson, dirichlet-multinomial', required: true },
|
|
604
|
+
prior_params: { type: 'string', description: 'Prior parameters as JSON (e.g. {"alpha": 1, "beta": 1} for Beta)', required: true },
|
|
605
|
+
likelihood_type: { type: 'string', description: 'Likelihood type (binomial, normal, poisson, multinomial)', required: true },
|
|
606
|
+
observations: { type: 'string', description: 'Observed data as comma-separated values or JSON', required: true },
|
|
607
|
+
},
|
|
608
|
+
tier: 'free',
|
|
609
|
+
async execute(args) {
|
|
610
|
+
const priorType = String(args.prior_type).toLowerCase().replace(/[_\s]/g, '-');
|
|
611
|
+
let priorParams;
|
|
612
|
+
try {
|
|
613
|
+
priorParams = JSON.parse(String(args.prior_params));
|
|
614
|
+
}
|
|
615
|
+
catch {
|
|
616
|
+
return '**Error**: prior_params must be valid JSON.';
|
|
617
|
+
}
|
|
618
|
+
const obsStr = String(args.observations);
|
|
619
|
+
let out = `## Bayesian Inference — ${priorType}\n\n`;
|
|
620
|
+
if (priorType === 'beta-binomial') {
|
|
621
|
+
const alpha0 = Number(priorParams.alpha) || 1;
|
|
622
|
+
const beta0 = Number(priorParams.beta) || 1;
|
|
623
|
+
const obs = parseCSV(obsStr);
|
|
624
|
+
const successes = obs.filter(v => v === 1 || v > 0).length;
|
|
625
|
+
const failures = obs.length - successes;
|
|
626
|
+
// Posterior: Beta(alpha0 + successes, beta0 + failures)
|
|
627
|
+
const alphaPost = alpha0 + successes;
|
|
628
|
+
const betaPost = beta0 + failures;
|
|
629
|
+
// Posterior mean and variance
|
|
630
|
+
const postMean = alphaPost / (alphaPost + betaPost);
|
|
631
|
+
const postVar = (alphaPost * betaPost) / ((alphaPost + betaPost) ** 2 * (alphaPost + betaPost + 1));
|
|
632
|
+
// 95% credible interval via normal approximation (good for large alpha+beta)
|
|
633
|
+
const postStd = Math.sqrt(postVar);
|
|
634
|
+
const ci95Lower = Math.max(0, postMean - 1.96 * postStd);
|
|
635
|
+
const ci95Upper = Math.min(1, postMean + 1.96 * postStd);
|
|
636
|
+
// Bayes factor for H1: p > 0.5 vs H0: p <= 0.5
|
|
637
|
+
// BF = P(data | H1) / P(data | H0) using prior predictive
|
|
638
|
+
const logBF = logGamma(alphaPost + betaPost) - logGamma(alphaPost) - logGamma(betaPost) -
|
|
639
|
+
(logGamma(alpha0 + beta0) - logGamma(alpha0) - logGamma(beta0));
|
|
640
|
+
out += `### Prior\n`;
|
|
641
|
+
out += `Beta(alpha = ${alpha0}, beta = ${beta0})\n\n`;
|
|
642
|
+
out += `### Data\n`;
|
|
643
|
+
out += `- Observations: ${obs.length}\n`;
|
|
644
|
+
out += `- Successes: ${successes}\n`;
|
|
645
|
+
out += `- Failures: ${failures}\n\n`;
|
|
646
|
+
out += `### Posterior\n`;
|
|
647
|
+
out += `Beta(alpha = ${alphaPost}, beta = ${betaPost})\n\n`;
|
|
648
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
649
|
+
out += `| Posterior Mean | ${fmt(postMean, 6)} |\n`;
|
|
650
|
+
out += `| Posterior Variance | ${fmt(postVar, 6)} |\n`;
|
|
651
|
+
out += `| Posterior Mode | ${fmt(alphaPost > 1 && betaPost > 1 ? (alphaPost - 1) / (alphaPost + betaPost - 2) : postMean, 6)} |\n`;
|
|
652
|
+
out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
|
|
653
|
+
out += `| Log Marginal Likelihood | ${fmt(logBF, 4)} |\n`;
|
|
654
|
+
}
|
|
655
|
+
else if (priorType === 'normal-normal') {
|
|
656
|
+
const mu0 = Number(priorParams.mu) || 0;
|
|
657
|
+
const sigma0 = Number(priorParams.sigma) || 1;
|
|
658
|
+
const tau0 = 1 / (sigma0 * sigma0); // prior precision
|
|
659
|
+
const obs = parseCSV(obsStr);
|
|
660
|
+
const n = obs.length;
|
|
661
|
+
const xBar = mean(obs);
|
|
662
|
+
const sigmaLikelihood = Number(priorParams.sigma_likelihood) || stddev(obs) || 1;
|
|
663
|
+
const tauLikelihood = 1 / (sigmaLikelihood * sigmaLikelihood);
|
|
664
|
+
// Posterior: Normal(muPost, 1/tauPost)
|
|
665
|
+
const tauPost = tau0 + n * tauLikelihood;
|
|
666
|
+
const muPost = (tau0 * mu0 + n * tauLikelihood * xBar) / tauPost;
|
|
667
|
+
const sigmaPost = 1 / Math.sqrt(tauPost);
|
|
668
|
+
const ci95Lower = muPost - 1.96 * sigmaPost;
|
|
669
|
+
const ci95Upper = muPost + 1.96 * sigmaPost;
|
|
670
|
+
out += `### Prior\n`;
|
|
671
|
+
out += `Normal(mu = ${mu0}, sigma = ${sigma0})\n\n`;
|
|
672
|
+
out += `### Data\n`;
|
|
673
|
+
out += `- N: ${n}\n`;
|
|
674
|
+
out += `- Sample Mean: ${fmt(xBar, 6)}\n`;
|
|
675
|
+
out += `- Known/Estimated Sigma: ${fmt(sigmaLikelihood, 6)}\n\n`;
|
|
676
|
+
out += `### Posterior\n`;
|
|
677
|
+
out += `Normal(mu = ${fmt(muPost, 6)}, sigma = ${fmt(sigmaPost, 6)})\n\n`;
|
|
678
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
679
|
+
out += `| Posterior Mean | ${fmt(muPost, 6)} |\n`;
|
|
680
|
+
out += `| Posterior Std Dev | ${fmt(sigmaPost, 6)} |\n`;
|
|
681
|
+
out += `| Posterior Precision | ${fmt(tauPost, 6)} |\n`;
|
|
682
|
+
out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
|
|
683
|
+
out += `| Prior Weight | ${fmt(tau0 / tauPost * 100, 1)}% |\n`;
|
|
684
|
+
out += `| Data Weight | ${fmt(n * tauLikelihood / tauPost * 100, 1)}% |\n`;
|
|
685
|
+
}
|
|
686
|
+
else if (priorType === 'gamma-poisson') {
|
|
687
|
+
const alpha0 = Number(priorParams.alpha) || 1;
|
|
688
|
+
const beta0 = Number(priorParams.beta) || 1;
|
|
689
|
+
const obs = parseCSV(obsStr);
|
|
690
|
+
const n = obs.length;
|
|
691
|
+
const total = sum(obs);
|
|
692
|
+
// Posterior: Gamma(alpha0 + sum, beta0 + n)
|
|
693
|
+
const alphaPost = alpha0 + total;
|
|
694
|
+
const betaPost = beta0 + n;
|
|
695
|
+
const postMean = alphaPost / betaPost;
|
|
696
|
+
const postVar = alphaPost / (betaPost * betaPost);
|
|
697
|
+
// Credible interval via normal approximation
|
|
698
|
+
const postStd = Math.sqrt(postVar);
|
|
699
|
+
const ci95Lower = Math.max(0, postMean - 1.96 * postStd);
|
|
700
|
+
const ci95Upper = postMean + 1.96 * postStd;
|
|
701
|
+
out += `### Prior\n`;
|
|
702
|
+
out += `Gamma(alpha = ${alpha0}, beta = ${beta0})\n\n`;
|
|
703
|
+
out += `### Data\n`;
|
|
704
|
+
out += `- N: ${n}\n`;
|
|
705
|
+
out += `- Sum: ${total}\n`;
|
|
706
|
+
out += `- Sample Mean: ${fmt(total / n, 4)}\n\n`;
|
|
707
|
+
out += `### Posterior\n`;
|
|
708
|
+
out += `Gamma(alpha = ${fmt(alphaPost, 2)}, beta = ${fmt(betaPost, 2)})\n\n`;
|
|
709
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
710
|
+
out += `| Posterior Mean (rate) | ${fmt(postMean, 6)} |\n`;
|
|
711
|
+
out += `| Posterior Variance | ${fmt(postVar, 6)} |\n`;
|
|
712
|
+
out += `| Posterior Mode | ${fmt(alphaPost > 1 ? (alphaPost - 1) / betaPost : 0, 6)} |\n`;
|
|
713
|
+
out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
|
|
714
|
+
}
|
|
715
|
+
else if (priorType === 'dirichlet-multinomial') {
|
|
716
|
+
let alphas;
|
|
717
|
+
if (Array.isArray(priorParams.alpha)) {
|
|
718
|
+
alphas = priorParams.alpha.map(Number);
|
|
719
|
+
}
|
|
720
|
+
else {
|
|
721
|
+
alphas = parseCSV(String(priorParams.alpha || '1,1,1'));
|
|
722
|
+
}
|
|
723
|
+
// Observations: counts for each category
|
|
724
|
+
const obs = parseCSV(obsStr);
|
|
725
|
+
if (obs.length !== alphas.length) {
|
|
726
|
+
return `**Error**: Number of observation categories (${obs.length}) must match number of prior alphas (${alphas.length}).`;
|
|
727
|
+
}
|
|
728
|
+
const alphasPost = alphas.map((a, i) => a + obs[i]);
|
|
729
|
+
const alphaSum = sum(alphasPost);
|
|
730
|
+
const postMeans = alphasPost.map(a => a / alphaSum);
|
|
731
|
+
out += `### Prior\n`;
|
|
732
|
+
out += `Dirichlet(${alphas.map(a => fmt(a, 1)).join(', ')})\n\n`;
|
|
733
|
+
out += `### Data (counts)\n`;
|
|
734
|
+
out += obs.map((o, i) => `- Category ${i + 1}: ${o}`).join('\n') + '\n\n';
|
|
735
|
+
out += `### Posterior\n`;
|
|
736
|
+
out += `Dirichlet(${alphasPost.map(a => fmt(a, 1)).join(', ')})\n\n`;
|
|
737
|
+
out += `| Category | Prior alpha | Posterior alpha | Posterior Mean |\n|---|---|---|---|\n`;
|
|
738
|
+
alphas.forEach((a, i) => {
|
|
739
|
+
out += `| ${i + 1} | ${fmt(a, 1)} | ${fmt(alphasPost[i], 1)} | ${fmt(postMeans[i], 6)} |\n`;
|
|
740
|
+
});
|
|
741
|
+
out += `\n| Total observations: ${sum(obs)} | Alpha sum: ${fmt(alphaSum, 1)} |\n`;
|
|
742
|
+
}
|
|
743
|
+
else {
|
|
744
|
+
return `**Error**: Unknown prior_type "${priorType}". Use: beta-binomial, normal-normal, gamma-poisson, dirichlet-multinomial.`;
|
|
745
|
+
}
|
|
746
|
+
return out;
|
|
747
|
+
},
|
|
748
|
+
});
|
|
749
|
+
// ── 3. TIME SERIES ANALYSIS ──
|
|
750
|
+
registerTool({
|
|
751
|
+
name: 'time_series_analyze',
|
|
752
|
+
description: 'Time series decomposition, moving averages, exponential smoothing (Holt-Winters), and simple ARIMA estimation. Returns trend, seasonal, and residual components plus forecasts.',
|
|
753
|
+
parameters: {
|
|
754
|
+
data: { type: 'string', description: 'Time series values (comma-separated numbers)', required: true },
|
|
755
|
+
frequency: { type: 'number', description: 'Seasonal frequency (e.g. 12 for monthly, 4 for quarterly)', required: true },
|
|
756
|
+
forecast_periods: { type: 'number', description: 'Number of periods to forecast (default 5)' },
|
|
757
|
+
method: { type: 'string', description: 'Method: decomposition, arima, or exponential_smoothing', required: true },
|
|
758
|
+
},
|
|
759
|
+
tier: 'free',
|
|
760
|
+
async execute(args) {
|
|
761
|
+
const data = parseCSV(String(args.data));
|
|
762
|
+
const freq = Number(args.frequency) || 4;
|
|
763
|
+
const forecastN = Number(args.forecast_periods) || 5;
|
|
764
|
+
const method = String(args.method).toLowerCase().replace(/[_\s]/g, '_');
|
|
765
|
+
if (data.length < 4)
|
|
766
|
+
return '**Error**: Need at least 4 data points.';
|
|
767
|
+
let out = `## Time Series Analysis\n\n`;
|
|
768
|
+
out += `- **Method:** ${method}\n`;
|
|
769
|
+
out += `- **N:** ${data.length}\n`;
|
|
770
|
+
out += `- **Frequency:** ${freq}\n`;
|
|
771
|
+
out += `- **Forecast Periods:** ${forecastN}\n\n`;
|
|
772
|
+
if (method === 'decomposition') {
|
|
773
|
+
// Additive decomposition: Y = Trend + Seasonal + Residual
|
|
774
|
+
// 1. Trend via centered moving average
|
|
775
|
+
const trend = new Array(data.length).fill(null);
|
|
776
|
+
const halfWin = Math.floor(freq / 2);
|
|
777
|
+
for (let i = halfWin; i < data.length - halfWin; i++) {
|
|
778
|
+
let s = 0;
|
|
779
|
+
if (freq % 2 === 0) {
|
|
780
|
+
// Even frequency: average first and last elements with half weight
|
|
781
|
+
for (let j = i - halfWin; j <= i + halfWin; j++) {
|
|
782
|
+
const weight = (j === i - halfWin || j === i + halfWin) ? 0.5 : 1;
|
|
783
|
+
s += data[j] * weight;
|
|
784
|
+
}
|
|
785
|
+
trend[i] = s / freq;
|
|
786
|
+
}
|
|
787
|
+
else {
|
|
788
|
+
for (let j = i - halfWin; j <= i + halfWin; j++)
|
|
789
|
+
s += data[j];
|
|
790
|
+
trend[i] = s / freq;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
// 2. Detrended series
|
|
794
|
+
const detrended = data.map((v, i) => trend[i] !== null ? v - trend[i] : null);
|
|
795
|
+
// 3. Seasonal component: average detrended values by position in cycle
|
|
796
|
+
const seasonal = new Array(freq).fill(0);
|
|
797
|
+
const seasonalCounts = new Array(freq).fill(0);
|
|
798
|
+
detrended.forEach((v, i) => {
|
|
799
|
+
if (v !== null) {
|
|
800
|
+
seasonal[i % freq] += v;
|
|
801
|
+
seasonalCounts[i % freq]++;
|
|
802
|
+
}
|
|
803
|
+
});
|
|
804
|
+
for (let i = 0; i < freq; i++) {
|
|
805
|
+
seasonal[i] = seasonalCounts[i] > 0 ? seasonal[i] / seasonalCounts[i] : 0;
|
|
806
|
+
}
|
|
807
|
+
// Center the seasonal component
|
|
808
|
+
const seasonalMean = mean(seasonal);
|
|
809
|
+
for (let i = 0; i < freq; i++)
|
|
810
|
+
seasonal[i] -= seasonalMean;
|
|
811
|
+
// 4. Residual
|
|
812
|
+
const residual = data.map((v, i) => {
|
|
813
|
+
const t = trend[i] !== null ? trend[i] : mean(data);
|
|
814
|
+
return v - t - seasonal[i % freq];
|
|
815
|
+
});
|
|
816
|
+
// Forecast: extend trend linearly + seasonal
|
|
817
|
+
const trendValues = trend.filter(t => t !== null);
|
|
818
|
+
const trendSlope = trendValues.length >= 2
|
|
819
|
+
? (trendValues[trendValues.length - 1] - trendValues[0]) / (trendValues.length - 1)
|
|
820
|
+
: 0;
|
|
821
|
+
const lastTrend = trendValues[trendValues.length - 1] || mean(data);
|
|
822
|
+
const forecasts = [];
|
|
823
|
+
for (let i = 1; i <= forecastN; i++) {
|
|
824
|
+
forecasts.push(lastTrend + trendSlope * i + seasonal[(data.length + i - 1) % freq]);
|
|
825
|
+
}
|
|
826
|
+
out += `### Seasonal Indices\n\n`;
|
|
827
|
+
out += `| Period | Index |\n|---|---|\n`;
|
|
828
|
+
seasonal.forEach((s, i) => { out += `| ${i + 1} | ${fmt(s, 4)} |\n`; });
|
|
829
|
+
out += `\n### Decomposition Summary\n\n`;
|
|
830
|
+
out += `| Component | Mean | Std Dev |\n|---|---|---|\n`;
|
|
831
|
+
out += `| Trend | ${fmt(mean(trendValues), 4)} | ${fmt(stddev(trendValues), 4)} |\n`;
|
|
832
|
+
out += `| Seasonal | ${fmt(mean(seasonal), 4)} | ${fmt(stddev(seasonal), 4)} |\n`;
|
|
833
|
+
out += `| Residual | ${fmt(mean(residual), 4)} | ${fmt(stddev(residual), 4)} |\n`;
|
|
834
|
+
out += `\n### Forecasts\n\n`;
|
|
835
|
+
out += `| Period | Value |\n|---|---|\n`;
|
|
836
|
+
forecasts.forEach((f, i) => { out += `| t+${i + 1} | ${fmt(f, 4)} |\n`; });
|
|
837
|
+
}
|
|
838
|
+
else if (method === 'exponential_smoothing') {
|
|
839
|
+
// Holt-Winters additive method
|
|
840
|
+
// Initialize
|
|
841
|
+
const alpha = 0.3; // level smoothing
|
|
842
|
+
const beta = 0.1; // trend smoothing
|
|
843
|
+
const gammaParam = 0.1; // seasonal smoothing
|
|
844
|
+
// Initialize level and trend from first cycle
|
|
845
|
+
let level = mean(data.slice(0, Math.min(freq, data.length)));
|
|
846
|
+
let trendVal = data.length > freq
|
|
847
|
+
? (mean(data.slice(freq, Math.min(2 * freq, data.length))) - mean(data.slice(0, freq))) / freq
|
|
848
|
+
: 0;
|
|
849
|
+
// Initialize seasonal indices
|
|
850
|
+
const seasonals = new Array(freq).fill(0);
|
|
851
|
+
if (data.length >= freq) {
|
|
852
|
+
for (let i = 0; i < freq; i++)
|
|
853
|
+
seasonals[i] = data[i] - level;
|
|
854
|
+
}
|
|
855
|
+
// Fit
|
|
856
|
+
const fitted = [];
|
|
857
|
+
const residuals = [];
|
|
858
|
+
for (let i = 0; i < data.length; i++) {
|
|
859
|
+
const si = i % freq;
|
|
860
|
+
const predicted = level + trendVal + seasonals[si];
|
|
861
|
+
fitted.push(predicted);
|
|
862
|
+
residuals.push(data[i] - predicted);
|
|
863
|
+
const newLevel = alpha * (data[i] - seasonals[si]) + (1 - alpha) * (level + trendVal);
|
|
864
|
+
const newTrend = beta * (newLevel - level) + (1 - beta) * trendVal;
|
|
865
|
+
seasonals[si] = gammaParam * (data[i] - newLevel) + (1 - gammaParam) * seasonals[si];
|
|
866
|
+
level = newLevel;
|
|
867
|
+
trendVal = newTrend;
|
|
868
|
+
}
|
|
869
|
+
// Forecast
|
|
870
|
+
const forecasts = [];
|
|
871
|
+
for (let i = 1; i <= forecastN; i++) {
|
|
872
|
+
forecasts.push(level + trendVal * i + seasonals[(data.length + i - 1) % freq]);
|
|
873
|
+
}
|
|
874
|
+
const mse = residuals.reduce((s, r) => s + r * r, 0) / residuals.length;
|
|
875
|
+
const mae = residuals.reduce((s, r) => s + Math.abs(r), 0) / residuals.length;
|
|
876
|
+
out += `### Holt-Winters Parameters\n\n`;
|
|
877
|
+
out += `| Parameter | Value |\n|---|---|\n`;
|
|
878
|
+
out += `| Alpha (level) | ${alpha} |\n`;
|
|
879
|
+
out += `| Beta (trend) | ${beta} |\n`;
|
|
880
|
+
out += `| Gamma (seasonal) | ${gammaParam} |\n`;
|
|
881
|
+
out += `| Final Level | ${fmt(level, 4)} |\n`;
|
|
882
|
+
out += `| Final Trend | ${fmt(trendVal, 4)} |\n`;
|
|
883
|
+
out += `\n### Fit Quality\n\n`;
|
|
884
|
+
out += `| Metric | Value |\n|---|---|\n`;
|
|
885
|
+
out += `| MSE | ${fmt(mse, 4)} |\n`;
|
|
886
|
+
out += `| RMSE | ${fmt(Math.sqrt(mse), 4)} |\n`;
|
|
887
|
+
out += `| MAE | ${fmt(mae, 4)} |\n`;
|
|
888
|
+
out += `\n### Seasonal Indices\n\n`;
|
|
889
|
+
out += `| Period | Index |\n|---|---|\n`;
|
|
890
|
+
seasonals.forEach((s, i) => { out += `| ${i + 1} | ${fmt(s, 4)} |\n`; });
|
|
891
|
+
out += `\n### Forecasts\n\n`;
|
|
892
|
+
out += `| Period | Value |\n|---|---|\n`;
|
|
893
|
+
forecasts.forEach((f, i) => { out += `| t+${i + 1} | ${fmt(f, 4)} |\n`; });
|
|
894
|
+
}
|
|
895
|
+
else if (method === 'arima') {
|
|
896
|
+
// Simple ARIMA(1,1,1) estimation
|
|
897
|
+
// Step 1: First difference
|
|
898
|
+
const diff = [];
|
|
899
|
+
for (let i = 1; i < data.length; i++)
|
|
900
|
+
diff.push(data[i] - data[i - 1]);
|
|
901
|
+
// Step 2: Estimate AR(1) coefficient via OLS on differenced series
|
|
902
|
+
let sumXY = 0, sumXX = 0;
|
|
903
|
+
for (let i = 1; i < diff.length; i++) {
|
|
904
|
+
sumXY += diff[i - 1] * diff[i];
|
|
905
|
+
sumXX += diff[i - 1] * diff[i - 1];
|
|
906
|
+
}
|
|
907
|
+
const phi = sumXX > 0 ? sumXY / sumXX : 0;
|
|
908
|
+
// Step 3: Compute AR residuals
|
|
909
|
+
const arResid = [diff[0]];
|
|
910
|
+
for (let i = 1; i < diff.length; i++) {
|
|
911
|
+
arResid.push(diff[i] - phi * diff[i - 1]);
|
|
912
|
+
}
|
|
913
|
+
// Step 4: Estimate MA(1) coefficient via autocorrelation of residuals
|
|
914
|
+
const arResidMean = mean(arResid);
|
|
915
|
+
let rho1Num = 0, rho1Den = 0;
|
|
916
|
+
for (let i = 0; i < arResid.length; i++) {
|
|
917
|
+
rho1Den += (arResid[i] - arResidMean) ** 2;
|
|
918
|
+
if (i > 0)
|
|
919
|
+
rho1Num += (arResid[i] - arResidMean) * (arResid[i - 1] - arResidMean);
|
|
920
|
+
}
|
|
921
|
+
const theta = rho1Den > 0 ? -(rho1Num / rho1Den) : 0;
|
|
922
|
+
// Step 5: Compute final residuals
|
|
923
|
+
const finalResid = [arResid[0]];
|
|
924
|
+
for (let i = 1; i < arResid.length; i++) {
|
|
925
|
+
finalResid.push(arResid[i] + theta * finalResid[i - 1]);
|
|
926
|
+
}
|
|
927
|
+
const sigmaResid = stddev(finalResid);
|
|
928
|
+
// Step 6: Forecast (on differenced scale, then integrate)
|
|
929
|
+
const lastDiff = diff[diff.length - 1];
|
|
930
|
+
const lastResid = finalResid[finalResid.length - 1];
|
|
931
|
+
const forecasts = [];
|
|
932
|
+
let prevDiff = lastDiff;
|
|
933
|
+
let prevResid = lastResid;
|
|
934
|
+
let lastVal = data[data.length - 1];
|
|
935
|
+
for (let i = 0; i < forecastN; i++) {
|
|
936
|
+
const nextDiff = phi * prevDiff + theta * prevResid;
|
|
937
|
+
lastVal += nextDiff;
|
|
938
|
+
forecasts.push(lastVal);
|
|
939
|
+
prevResid = 0; // future residuals assumed 0
|
|
940
|
+
prevDiff = nextDiff;
|
|
941
|
+
}
|
|
942
|
+
// AIC = n * ln(RSS/n) + 2k
|
|
943
|
+
const rss = finalResid.reduce((s, r) => s + r * r, 0);
|
|
944
|
+
const nResid = finalResid.length;
|
|
945
|
+
const aic = nResid * Math.log(rss / nResid) + 2 * 3; // 3 params: phi, theta, sigma
|
|
946
|
+
out += `### ARIMA(1,1,1) Parameter Estimates\n\n`;
|
|
947
|
+
out += `| Parameter | Value |\n|---|---|\n`;
|
|
948
|
+
out += `| AR(1) phi | ${fmt(phi, 6)} |\n`;
|
|
949
|
+
out += `| MA(1) theta | ${fmt(theta, 6)} |\n`;
|
|
950
|
+
out += `| Residual Sigma | ${fmt(sigmaResid, 6)} |\n`;
|
|
951
|
+
out += `| AIC | ${fmt(aic, 4)} |\n`;
|
|
952
|
+
out += `\n### Differenced Series Summary\n\n`;
|
|
953
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
954
|
+
out += `| Mean | ${fmt(mean(diff), 4)} |\n`;
|
|
955
|
+
out += `| Std Dev | ${fmt(stddev(diff), 4)} |\n`;
|
|
956
|
+
out += `| Autocorrelation(1) | ${fmt(rho1Den > 0 ? rho1Num / rho1Den : 0, 4)} |\n`;
|
|
957
|
+
out += `\n### Forecasts\n\n`;
|
|
958
|
+
out += `| Period | Value | 95% CI |\n|---|---|---|\n`;
|
|
959
|
+
forecasts.forEach((f, i) => {
|
|
960
|
+
const ciWidth = 1.96 * sigmaResid * Math.sqrt(i + 1);
|
|
961
|
+
out += `| t+${i + 1} | ${fmt(f, 4)} | [${fmt(f - ciWidth, 4)}, ${fmt(f + ciWidth, 4)}] |\n`;
|
|
962
|
+
});
|
|
963
|
+
}
|
|
964
|
+
else {
|
|
965
|
+
return `**Error**: Unknown method "${method}". Use: decomposition, arima, exponential_smoothing.`;
|
|
966
|
+
}
|
|
967
|
+
return out;
|
|
968
|
+
},
|
|
969
|
+
});
|
|
970
|
+
// ── 4. DIMENSIONALITY REDUCTION (PCA) ──
|
|
971
|
+
registerTool({
|
|
972
|
+
name: 'dimensionality_reduce',
|
|
973
|
+
description: 'Principal Component Analysis (PCA) via eigendecomposition of the covariance matrix. Returns principal components, explained variance ratios, and loadings.',
|
|
974
|
+
parameters: {
|
|
975
|
+
data: { type: 'string', description: 'Data as JSON array of arrays (each inner array is a sample)', required: true },
|
|
976
|
+
method: { type: 'string', description: 'Method: pca (currently the only supported method)', required: true },
|
|
977
|
+
n_components: { type: 'number', description: 'Number of principal components to return (default 2)' },
|
|
978
|
+
},
|
|
979
|
+
tier: 'free',
|
|
980
|
+
async execute(args) {
|
|
981
|
+
let dataMatrix;
|
|
982
|
+
try {
|
|
983
|
+
dataMatrix = JSON.parse(String(args.data));
|
|
984
|
+
}
|
|
985
|
+
catch {
|
|
986
|
+
return '**Error**: data must be a valid JSON array of arrays.';
|
|
987
|
+
}
|
|
988
|
+
if (!Array.isArray(dataMatrix) || dataMatrix.length < 2 || !Array.isArray(dataMatrix[0])) {
|
|
989
|
+
return '**Error**: data must be a 2D array with at least 2 rows.';
|
|
990
|
+
}
|
|
991
|
+
const methodName = String(args.method).toLowerCase();
|
|
992
|
+
if (methodName !== 'pca') {
|
|
993
|
+
return `**Error**: Unknown method "${methodName}". Currently only "pca" is supported.`;
|
|
994
|
+
}
|
|
995
|
+
const n = dataMatrix.length;
|
|
996
|
+
const p = dataMatrix[0].length;
|
|
997
|
+
const nComp = Math.min(Number(args.n_components) || 2, p);
|
|
998
|
+
// 1. Center the data (subtract column means)
|
|
999
|
+
const colMeans = Array.from({ length: p }, (_, j) => mean(dataMatrix.map(row => row[j])));
|
|
1000
|
+
const centered = dataMatrix.map(row => row.map((v, j) => v - colMeans[j]));
|
|
1001
|
+
// 2. Compute covariance matrix (1/(n-1) * X'X)
|
|
1002
|
+
const covMatrix = matCreate(p, p);
|
|
1003
|
+
for (let i = 0; i < p; i++) {
|
|
1004
|
+
for (let j = i; j < p; j++) {
|
|
1005
|
+
let s = 0;
|
|
1006
|
+
for (let k = 0; k < n; k++)
|
|
1007
|
+
s += centered[k][i] * centered[k][j];
|
|
1008
|
+
covMatrix[i][j] = s / (n - 1);
|
|
1009
|
+
covMatrix[j][i] = covMatrix[i][j];
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
// 3. Eigendecomposition
|
|
1013
|
+
const eigen = symmetricEigen(covMatrix);
|
|
1014
|
+
// 4. Sort eigenvalues descending
|
|
1015
|
+
const indices = eigen.values.map((v, i) => ({ val: v, idx: i }))
|
|
1016
|
+
.sort((a, b) => b.val - a.val);
|
|
1017
|
+
const totalVariance = sum(eigen.values.filter(v => v > 0));
|
|
1018
|
+
const explainedRatios = indices.map(({ val }) => Math.max(0, val) / (totalVariance || 1));
|
|
1019
|
+
const cumulativeRatios = [];
|
|
1020
|
+
let cumSum = 0;
|
|
1021
|
+
for (const r of explainedRatios) {
|
|
1022
|
+
cumSum += r;
|
|
1023
|
+
cumulativeRatios.push(cumSum);
|
|
1024
|
+
}
|
|
1025
|
+
// 5. Loadings: eigenvectors (columns of V) corresponding to top eigenvalues
|
|
1026
|
+
const loadings = [];
|
|
1027
|
+
for (let c = 0; c < nComp; c++) {
|
|
1028
|
+
const colIdx = indices[c].idx;
|
|
1029
|
+
loadings.push(Array.from({ length: p }, (_, row) => eigen.vectors[row][colIdx]));
|
|
1030
|
+
}
|
|
1031
|
+
// 6. Project data onto principal components
|
|
1032
|
+
const projected = centered.map(row => {
|
|
1033
|
+
const scores = [];
|
|
1034
|
+
for (let c = 0; c < nComp; c++) {
|
|
1035
|
+
let score = 0;
|
|
1036
|
+
for (let j = 0; j < p; j++)
|
|
1037
|
+
score += row[j] * loadings[c][j];
|
|
1038
|
+
scores.push(score);
|
|
1039
|
+
}
|
|
1040
|
+
return scores;
|
|
1041
|
+
});
|
|
1042
|
+
let out = `## PCA — Principal Component Analysis\n\n`;
|
|
1043
|
+
out += `- **Samples:** ${n}\n`;
|
|
1044
|
+
out += `- **Features:** ${p}\n`;
|
|
1045
|
+
out += `- **Components:** ${nComp}\n\n`;
|
|
1046
|
+
out += `### Explained Variance\n\n`;
|
|
1047
|
+
out += `| PC | Eigenvalue | Explained % | Cumulative % |\n|---|---|---|---|\n`;
|
|
1048
|
+
for (let i = 0; i < Math.min(p, nComp + 2); i++) {
|
|
1049
|
+
const marker = i < nComp ? ' *' : '';
|
|
1050
|
+
out += `| PC${i + 1}${marker} | ${fmt(Math.max(0, indices[i].val), 6)} | ${fmt(explainedRatios[i] * 100, 2)}% | ${fmt(cumulativeRatios[i] * 100, 2)}% |\n`;
|
|
1051
|
+
}
|
|
1052
|
+
out += `\n### Loadings (top ${nComp} components)\n\n`;
|
|
1053
|
+
out += `| Feature |`;
|
|
1054
|
+
for (let c = 0; c < nComp; c++)
|
|
1055
|
+
out += ` PC${c + 1} |`;
|
|
1056
|
+
out += `\n|---|`;
|
|
1057
|
+
for (let c = 0; c < nComp; c++)
|
|
1058
|
+
out += `---|`;
|
|
1059
|
+
out += `\n`;
|
|
1060
|
+
for (let j = 0; j < p; j++) {
|
|
1061
|
+
out += `| Feature ${j + 1} |`;
|
|
1062
|
+
for (let c = 0; c < nComp; c++)
|
|
1063
|
+
out += ` ${fmt(loadings[c][j], 4)} |`;
|
|
1064
|
+
out += `\n`;
|
|
1065
|
+
}
|
|
1066
|
+
out += `\n### Projected Scores (first ${Math.min(10, n)} samples)\n\n`;
|
|
1067
|
+
out += `| Sample |`;
|
|
1068
|
+
for (let c = 0; c < nComp; c++)
|
|
1069
|
+
out += ` PC${c + 1} |`;
|
|
1070
|
+
out += `\n|---|`;
|
|
1071
|
+
for (let c = 0; c < nComp; c++)
|
|
1072
|
+
out += `---|`;
|
|
1073
|
+
out += `\n`;
|
|
1074
|
+
for (let i = 0; i < Math.min(10, n); i++) {
|
|
1075
|
+
out += `| ${i + 1} |`;
|
|
1076
|
+
for (let c = 0; c < nComp; c++)
|
|
1077
|
+
out += ` ${fmt(projected[i][c], 4)} |`;
|
|
1078
|
+
out += `\n`;
|
|
1079
|
+
}
|
|
1080
|
+
if (n > 10)
|
|
1081
|
+
out += `\n*...${n - 10} more samples omitted.*\n`;
|
|
1082
|
+
return out;
|
|
1083
|
+
},
|
|
1084
|
+
});
|
|
1085
|
+
// ── 5. DISTRIBUTION FIT ──
|
|
1086
|
+
registerTool({
|
|
1087
|
+
name: 'distribution_fit',
|
|
1088
|
+
description: 'Fit data to distributions (normal, poisson, exponential, gamma, weibull, lognormal) using MLE. Kolmogorov-Smirnov goodness-of-fit test. AIC/BIC comparison.',
|
|
1089
|
+
parameters: {
|
|
1090
|
+
data: { type: 'string', description: 'Data values (comma-separated numbers)', required: true },
|
|
1091
|
+
candidate_distributions: { type: 'string', description: 'Distributions to test (comma-separated): normal, poisson, exponential, gamma, weibull, lognormal', required: true },
|
|
1092
|
+
},
|
|
1093
|
+
tier: 'free',
|
|
1094
|
+
async execute(args) {
|
|
1095
|
+
const data = parseCSV(String(args.data));
|
|
1096
|
+
if (data.length < 3)
|
|
1097
|
+
return '**Error**: Need at least 3 data points.';
|
|
1098
|
+
const candidates = String(args.candidate_distributions).split(',').map(s => s.trim().toLowerCase());
|
|
1099
|
+
const n = data.length;
|
|
1100
|
+
const sorted = [...data].sort((a, b) => a - b);
|
|
1101
|
+
// Empirical CDF values for KS test
|
|
1102
|
+
const empiricalCDF = sorted.map((_, i) => (i + 1) / n);
|
|
1103
|
+
const results = [];
|
|
1104
|
+
for (const dist of candidates) {
|
|
1105
|
+
let params = {};
|
|
1106
|
+
let cdfFn;
|
|
1107
|
+
let logLikFn;
|
|
1108
|
+
let nParams = 0;
|
|
1109
|
+
if (dist === 'normal') {
|
|
1110
|
+
const mu = mean(data);
|
|
1111
|
+
const sigma = stddev(data);
|
|
1112
|
+
params = { mu, sigma };
|
|
1113
|
+
nParams = 2;
|
|
1114
|
+
cdfFn = (x) => normalCDF((x - mu) / sigma);
|
|
1115
|
+
logLikFn = (x) => -0.5 * Math.log(2 * Math.PI) - Math.log(sigma) - 0.5 * ((x - mu) / sigma) ** 2;
|
|
1116
|
+
}
|
|
1117
|
+
else if (dist === 'poisson') {
|
|
1118
|
+
const lambda = mean(data);
|
|
1119
|
+
params = { lambda };
|
|
1120
|
+
nParams = 1;
|
|
1121
|
+
// Poisson CDF via summation
|
|
1122
|
+
cdfFn = (x) => {
|
|
1123
|
+
const k = Math.floor(x);
|
|
1124
|
+
if (k < 0)
|
|
1125
|
+
return 0;
|
|
1126
|
+
let p = 0;
|
|
1127
|
+
for (let i = 0; i <= k; i++) {
|
|
1128
|
+
p += Math.exp(-lambda + i * Math.log(lambda) - logGamma(i + 1));
|
|
1129
|
+
}
|
|
1130
|
+
return Math.min(1, p);
|
|
1131
|
+
};
|
|
1132
|
+
logLikFn = (x) => {
|
|
1133
|
+
const k = Math.round(x);
|
|
1134
|
+
return -lambda + k * Math.log(lambda) - logGamma(k + 1);
|
|
1135
|
+
};
|
|
1136
|
+
}
|
|
1137
|
+
else if (dist === 'exponential') {
|
|
1138
|
+
const lambda = 1 / mean(data.filter(v => v > 0));
|
|
1139
|
+
params = { lambda };
|
|
1140
|
+
nParams = 1;
|
|
1141
|
+
cdfFn = (x) => x >= 0 ? 1 - Math.exp(-lambda * x) : 0;
|
|
1142
|
+
logLikFn = (x) => x >= 0 ? Math.log(lambda) - lambda * x : -Infinity;
|
|
1143
|
+
}
|
|
1144
|
+
else if (dist === 'gamma') {
|
|
1145
|
+
// MLE via method of moments
|
|
1146
|
+
const m = mean(data.filter(v => v > 0));
|
|
1147
|
+
const v = variance(data.filter(v => v > 0));
|
|
1148
|
+
const shape = v > 0 ? (m * m) / v : 1;
|
|
1149
|
+
const rate = v > 0 ? m / v : 1;
|
|
1150
|
+
params = { shape, rate };
|
|
1151
|
+
nParams = 2;
|
|
1152
|
+
cdfFn = (x) => x > 0 ? lowerIncompleteGammaP(shape, rate * x) : 0;
|
|
1153
|
+
logLikFn = (x) => {
|
|
1154
|
+
if (x <= 0)
|
|
1155
|
+
return -Infinity;
|
|
1156
|
+
return (shape - 1) * Math.log(x) + shape * Math.log(rate) - rate * x - logGamma(shape);
|
|
1157
|
+
};
|
|
1158
|
+
}
|
|
1159
|
+
else if (dist === 'weibull') {
|
|
1160
|
+
// MLE via Newton-Raphson for shape, then scale
|
|
1161
|
+
const positiveData = data.filter(v => v > 0);
|
|
1162
|
+
if (positiveData.length < 2)
|
|
1163
|
+
continue;
|
|
1164
|
+
const logData = positiveData.map(v => Math.log(v));
|
|
1165
|
+
const meanLog = mean(logData);
|
|
1166
|
+
// Newton-Raphson for shape parameter k
|
|
1167
|
+
let k = 1.0;
|
|
1168
|
+
for (let iter = 0; iter < 50; iter++) {
|
|
1169
|
+
const xk = positiveData.map(x => x ** k);
|
|
1170
|
+
const xkLogX = positiveData.map((x, i) => (x ** k) * logData[i]);
|
|
1171
|
+
const sumXk = sum(xk);
|
|
1172
|
+
const sumXkLogX = sum(xkLogX);
|
|
1173
|
+
const sumLogX = sum(logData);
|
|
1174
|
+
const f = sumXkLogX / sumXk - 1 / k - sumLogX / positiveData.length;
|
|
1175
|
+
// Approximate derivative
|
|
1176
|
+
const h = 0.001;
|
|
1177
|
+
const xkh = positiveData.map(x => x ** (k + h));
|
|
1178
|
+
const xkhLogX = positiveData.map((x, i) => (x ** (k + h)) * logData[i]);
|
|
1179
|
+
const fh = sum(xkhLogX) / sum(xkh) - 1 / (k + h) - sumLogX / positiveData.length;
|
|
1180
|
+
const df = (fh - f) / h;
|
|
1181
|
+
if (Math.abs(df) < 1e-20)
|
|
1182
|
+
break;
|
|
1183
|
+
const step = f / df;
|
|
1184
|
+
k -= step;
|
|
1185
|
+
if (k <= 0.01)
|
|
1186
|
+
k = 0.01;
|
|
1187
|
+
if (Math.abs(step) < 1e-10)
|
|
1188
|
+
break;
|
|
1189
|
+
}
|
|
1190
|
+
const lambda_w = (sum(data.filter(v => v > 0).map(x => x ** k)) / positiveData.length) ** (1 / k);
|
|
1191
|
+
params = { shape: k, scale: lambda_w };
|
|
1192
|
+
nParams = 2;
|
|
1193
|
+
cdfFn = (x) => x > 0 ? 1 - Math.exp(-((x / lambda_w) ** k)) : 0;
|
|
1194
|
+
logLikFn = (x) => {
|
|
1195
|
+
if (x <= 0)
|
|
1196
|
+
return -Infinity;
|
|
1197
|
+
return Math.log(k) - k * Math.log(lambda_w) + (k - 1) * Math.log(x) - (x / lambda_w) ** k;
|
|
1198
|
+
};
|
|
1199
|
+
}
|
|
1200
|
+
else if (dist === 'lognormal') {
|
|
1201
|
+
const logData = data.filter(v => v > 0).map(v => Math.log(v));
|
|
1202
|
+
if (logData.length < 2)
|
|
1203
|
+
continue;
|
|
1204
|
+
const mu = mean(logData);
|
|
1205
|
+
const sigma = stddev(logData);
|
|
1206
|
+
params = { mu, sigma };
|
|
1207
|
+
nParams = 2;
|
|
1208
|
+
cdfFn = (x) => x > 0 ? normalCDF((Math.log(x) - mu) / sigma) : 0;
|
|
1209
|
+
logLikFn = (x) => {
|
|
1210
|
+
if (x <= 0)
|
|
1211
|
+
return -Infinity;
|
|
1212
|
+
return -Math.log(x) - 0.5 * Math.log(2 * Math.PI) - Math.log(sigma) - 0.5 * ((Math.log(x) - mu) / sigma) ** 2;
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
1215
|
+
else {
|
|
1216
|
+
continue;
|
|
1217
|
+
}
|
|
1218
|
+
// Log-likelihood
|
|
1219
|
+
const logLik = data.reduce((s, x) => s + logLikFn(x), 0);
|
|
1220
|
+
// AIC and BIC
|
|
1221
|
+
const aic = -2 * logLik + 2 * nParams;
|
|
1222
|
+
const bic = -2 * logLik + nParams * Math.log(n);
|
|
1223
|
+
// KS test: max |F_empirical(x) - F_theoretical(x)|
|
|
1224
|
+
let ksMax = 0;
|
|
1225
|
+
for (let i = 0; i < sorted.length; i++) {
|
|
1226
|
+
const theorCDF = cdfFn(sorted[i]);
|
|
1227
|
+
const diff1 = Math.abs(empiricalCDF[i] - theorCDF);
|
|
1228
|
+
const diff2 = Math.abs((i > 0 ? empiricalCDF[i - 1] : 0) - theorCDF);
|
|
1229
|
+
ksMax = Math.max(ksMax, diff1, diff2);
|
|
1230
|
+
}
|
|
1231
|
+
// KS p-value approximation (Kolmogorov distribution, large sample)
|
|
1232
|
+
const sqrtN = Math.sqrt(n);
|
|
1233
|
+
const z = (sqrtN + 0.12 + 0.11 / sqrtN) * ksMax;
|
|
1234
|
+
const ksPValue = 2 * Math.exp(-2 * z * z);
|
|
1235
|
+
results.push({
|
|
1236
|
+
name: dist,
|
|
1237
|
+
params,
|
|
1238
|
+
logLik,
|
|
1239
|
+
aic,
|
|
1240
|
+
bic,
|
|
1241
|
+
ksStatistic: ksMax,
|
|
1242
|
+
ksPValue: Math.min(1, Math.max(0, ksPValue)),
|
|
1243
|
+
nParams,
|
|
1244
|
+
});
|
|
1245
|
+
}
|
|
1246
|
+
if (results.length === 0)
|
|
1247
|
+
return '**Error**: No valid distributions to test.';
|
|
1248
|
+
// Sort by AIC
|
|
1249
|
+
results.sort((a, b) => a.aic - b.aic);
|
|
1250
|
+
let out = `## Distribution Fitting Results\n\n`;
|
|
1251
|
+
out += `**N = ${n}** | Data range: [${fmt(sorted[0], 4)}, ${fmt(sorted[sorted.length - 1], 4)}] | Mean: ${fmt(mean(data), 4)} | SD: ${fmt(stddev(data), 4)}\n\n`;
|
|
1252
|
+
out += `### Model Comparison (sorted by AIC)\n\n`;
|
|
1253
|
+
out += `| Distribution | AIC | BIC | Log-Lik | KS Stat | KS p-value | Fit |\n|---|---|---|---|---|---|---|\n`;
|
|
1254
|
+
results.forEach((r, i) => {
|
|
1255
|
+
const fit = r.ksPValue > 0.05 ? 'Good' : r.ksPValue > 0.01 ? 'Marginal' : 'Poor';
|
|
1256
|
+
const best = i === 0 ? ' **BEST**' : '';
|
|
1257
|
+
out += `| ${r.name}${best} | ${fmt(r.aic, 2)} | ${fmt(r.bic, 2)} | ${fmt(r.logLik, 2)} | ${fmt(r.ksStatistic, 4)} | ${r.ksPValue < 0.001 ? '<0.001' : fmt(r.ksPValue, 4)} | ${fit} |\n`;
|
|
1258
|
+
});
|
|
1259
|
+
out += `\n### Parameter Estimates\n\n`;
|
|
1260
|
+
results.forEach(r => {
|
|
1261
|
+
out += `**${r.name}**: `;
|
|
1262
|
+
out += Object.entries(r.params).map(([k, v]) => `${k} = ${fmt(v, 6)}`).join(', ');
|
|
1263
|
+
out += `\n`;
|
|
1264
|
+
});
|
|
1265
|
+
return out;
|
|
1266
|
+
},
|
|
1267
|
+
});
|
|
1268
|
+
// ── 6. CORRELATION MATRIX ──
|
|
1269
|
+
registerTool({
|
|
1270
|
+
name: 'correlation_matrix',
|
|
1271
|
+
description: 'Compute Pearson, Spearman, or Kendall correlation matrix. Output as formatted table with significance markers (*** p<0.001, ** p<0.01, * p<0.05).',
|
|
1272
|
+
parameters: {
|
|
1273
|
+
data: { type: 'string', description: 'Data as JSON array of arrays (each inner array is a variable\'s values)', required: true },
|
|
1274
|
+
method: { type: 'string', description: 'Correlation method: pearson, spearman, or kendall', required: true },
|
|
1275
|
+
variable_names: { type: 'string', description: 'Variable names (comma-separated, optional)' },
|
|
1276
|
+
},
|
|
1277
|
+
tier: 'free',
|
|
1278
|
+
async execute(args) {
|
|
1279
|
+
let dataMatrix;
|
|
1280
|
+
try {
|
|
1281
|
+
dataMatrix = JSON.parse(String(args.data));
|
|
1282
|
+
}
|
|
1283
|
+
catch {
|
|
1284
|
+
return '**Error**: data must be a valid JSON array of arrays.';
|
|
1285
|
+
}
|
|
1286
|
+
if (!Array.isArray(dataMatrix) || dataMatrix.length < 2) {
|
|
1287
|
+
return '**Error**: Need at least 2 variables (arrays).';
|
|
1288
|
+
}
|
|
1289
|
+
const methodName = String(args.method).toLowerCase();
|
|
1290
|
+
const p = dataMatrix.length;
|
|
1291
|
+
const n = dataMatrix[0].length;
|
|
1292
|
+
const names = args.variable_names
|
|
1293
|
+
? String(args.variable_names).split(',').map(s => s.trim())
|
|
1294
|
+
: dataMatrix.map((_, i) => `V${i + 1}`);
|
|
1295
|
+
// Compute correlation function
|
|
1296
|
+
function pearsonCorr(x, y) {
|
|
1297
|
+
const mx = mean(x), my = mean(y);
|
|
1298
|
+
let num = 0, dx2 = 0, dy2 = 0;
|
|
1299
|
+
for (let i = 0; i < x.length; i++) {
|
|
1300
|
+
const dx = x[i] - mx, dy = y[i] - my;
|
|
1301
|
+
num += dx * dy;
|
|
1302
|
+
dx2 += dx * dx;
|
|
1303
|
+
dy2 += dy * dy;
|
|
1304
|
+
}
|
|
1305
|
+
const denom = Math.sqrt(dx2 * dy2);
|
|
1306
|
+
return denom > 0 ? num / denom : 0;
|
|
1307
|
+
}
|
|
1308
|
+
function spearmanCorr(x, y) {
|
|
1309
|
+
return pearsonCorr(rank(x), rank(y));
|
|
1310
|
+
}
|
|
1311
|
+
function kendallCorr(x, y) {
|
|
1312
|
+
let concordant = 0, discordant = 0;
|
|
1313
|
+
for (let i = 0; i < x.length; i++) {
|
|
1314
|
+
for (let j = i + 1; j < x.length; j++) {
|
|
1315
|
+
const dx = x[i] - x[j];
|
|
1316
|
+
const dy = y[i] - y[j];
|
|
1317
|
+
if (dx * dy > 0)
|
|
1318
|
+
concordant++;
|
|
1319
|
+
else if (dx * dy < 0)
|
|
1320
|
+
discordant++;
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
const total = concordant + discordant;
|
|
1324
|
+
return total > 0 ? (concordant - discordant) / total : 0;
|
|
1325
|
+
}
|
|
1326
|
+
const corrFn = methodName === 'spearman' ? spearmanCorr
|
|
1327
|
+
: methodName === 'kendall' ? kendallCorr
|
|
1328
|
+
: pearsonCorr;
|
|
1329
|
+
// Build correlation matrix
|
|
1330
|
+
const corrMatrix = matCreate(p, p);
|
|
1331
|
+
const pValueMatrix = matCreate(p, p);
|
|
1332
|
+
for (let i = 0; i < p; i++) {
|
|
1333
|
+
corrMatrix[i][i] = 1.0;
|
|
1334
|
+
pValueMatrix[i][i] = 0;
|
|
1335
|
+
for (let j = i + 1; j < p; j++) {
|
|
1336
|
+
const r = corrFn(dataMatrix[i], dataMatrix[j]);
|
|
1337
|
+
corrMatrix[i][j] = r;
|
|
1338
|
+
corrMatrix[j][i] = r;
|
|
1339
|
+
// p-value for correlation
|
|
1340
|
+
let pVal;
|
|
1341
|
+
if (methodName === 'kendall') {
|
|
1342
|
+
// Normal approximation for Kendall's tau
|
|
1343
|
+
const se = Math.sqrt((2 * (2 * n + 5)) / (9 * n * (n - 1)));
|
|
1344
|
+
const z = Math.abs(r) / se;
|
|
1345
|
+
pVal = 2 * (1 - normalCDF(z));
|
|
1346
|
+
}
|
|
1347
|
+
else {
|
|
1348
|
+
// t-test for Pearson/Spearman
|
|
1349
|
+
if (Math.abs(r) >= 1) {
|
|
1350
|
+
pVal = 0;
|
|
1351
|
+
}
|
|
1352
|
+
else {
|
|
1353
|
+
const t = r * Math.sqrt((n - 2) / (1 - r * r));
|
|
1354
|
+
pVal = 2 * (1 - tCDF(Math.abs(t), n - 2));
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
pValueMatrix[i][j] = pVal;
|
|
1358
|
+
pValueMatrix[j][i] = pVal;
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
function sigMarker(pVal) {
|
|
1362
|
+
if (pVal < 0.001)
|
|
1363
|
+
return '***';
|
|
1364
|
+
if (pVal < 0.01)
|
|
1365
|
+
return '**';
|
|
1366
|
+
if (pVal < 0.05)
|
|
1367
|
+
return '*';
|
|
1368
|
+
return '';
|
|
1369
|
+
}
|
|
1370
|
+
let out = `## Correlation Matrix — ${methodName.charAt(0).toUpperCase() + methodName.slice(1)}\n\n`;
|
|
1371
|
+
out += `**N = ${n}** observations\n\n`;
|
|
1372
|
+
// Header
|
|
1373
|
+
out += `| |`;
|
|
1374
|
+
names.forEach(name => { out += ` ${name} |`; });
|
|
1375
|
+
out += `\n|---|`;
|
|
1376
|
+
names.forEach(() => { out += `---|`; });
|
|
1377
|
+
out += `\n`;
|
|
1378
|
+
// Body
|
|
1379
|
+
for (let i = 0; i < p; i++) {
|
|
1380
|
+
out += `| **${names[i]}** |`;
|
|
1381
|
+
for (let j = 0; j < p; j++) {
|
|
1382
|
+
if (i === j) {
|
|
1383
|
+
out += ` 1.0000 |`;
|
|
1384
|
+
}
|
|
1385
|
+
else {
|
|
1386
|
+
const marker = sigMarker(pValueMatrix[i][j]);
|
|
1387
|
+
out += ` ${fmt(corrMatrix[i][j], 4)}${marker} |`;
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
out += `\n`;
|
|
1391
|
+
}
|
|
1392
|
+
out += `\nSignificance: \\*\\*\\* p<0.001, \\*\\* p<0.01, \\* p<0.05\n`;
|
|
1393
|
+
// Strongest correlations
|
|
1394
|
+
const pairs = [];
|
|
1395
|
+
for (let i = 0; i < p; i++) {
|
|
1396
|
+
for (let j = i + 1; j < p; j++) {
|
|
1397
|
+
pairs.push({ i, j, r: corrMatrix[i][j], p: pValueMatrix[i][j] });
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
pairs.sort((a, b) => Math.abs(b.r) - Math.abs(a.r));
|
|
1401
|
+
if (pairs.length > 0) {
|
|
1402
|
+
out += `\n### Strongest Correlations\n\n`;
|
|
1403
|
+
out += `| Pair | r | p-value |\n|---|---|---|\n`;
|
|
1404
|
+
pairs.slice(0, 5).forEach(pair => {
|
|
1405
|
+
out += `| ${names[pair.i]} - ${names[pair.j]} | ${fmt(pair.r, 4)} | ${pair.p < 0.001 ? '<0.001' : fmt(pair.p, 4)} |\n`;
|
|
1406
|
+
});
|
|
1407
|
+
}
|
|
1408
|
+
return out;
|
|
1409
|
+
},
|
|
1410
|
+
});
|
|
1411
|
+
// ── 7. POWER ANALYSIS ──
|
|
1412
|
+
registerTool({
|
|
1413
|
+
name: 'power_analysis',
|
|
1414
|
+
description: 'Calculate statistical power, required sample size, or minimum detectable effect size for t-test, ANOVA, chi-square, or proportion test.',
|
|
1415
|
+
parameters: {
|
|
1416
|
+
test_type: { type: 'string', description: 'Test type: t_test, anova, chi_square, proportion', required: true },
|
|
1417
|
+
effect_size: { type: 'number', description: 'Effect size (Cohen\'s d for t-test, f for ANOVA, w for chi-square, h for proportion)', required: true },
|
|
1418
|
+
alpha: { type: 'number', description: 'Significance level (default 0.05)' },
|
|
1419
|
+
power: { type: 'number', description: 'Desired power (default 0.8)' },
|
|
1420
|
+
solve_for: { type: 'string', description: 'What to solve for: n (sample size), power, or effect (effect size)', required: true },
|
|
1421
|
+
},
|
|
1422
|
+
tier: 'free',
|
|
1423
|
+
async execute(args) {
|
|
1424
|
+
const testType = String(args.test_type).toLowerCase().replace(/[_\s]/g, '_');
|
|
1425
|
+
let effectSize = Number(args.effect_size) || 0.5;
|
|
1426
|
+
let alpha = Number(args.alpha) || 0.05;
|
|
1427
|
+
let power = Number(args.power) || 0.8;
|
|
1428
|
+
const solveFor = String(args.solve_for).toLowerCase();
|
|
1429
|
+
let out = `## Power Analysis\n\n`;
|
|
1430
|
+
out += `- **Test:** ${testType}\n`;
|
|
1431
|
+
// The critical z-value for one-tailed alpha
|
|
1432
|
+
const zAlpha = normalQuantile(1 - alpha / 2);
|
|
1433
|
+
if (testType === 't_test') {
|
|
1434
|
+
// Two-sample t-test power
|
|
1435
|
+
// power = P(reject H0 | H1 true) = Phi(d*sqrt(n/2) - z_alpha/2)
|
|
1436
|
+
if (solveFor === 'n') {
|
|
1437
|
+
const zBeta = normalQuantile(power);
|
|
1438
|
+
const n = Math.ceil(2 * ((zAlpha + zBeta) / effectSize) ** 2);
|
|
1439
|
+
out += `- **Effect Size (d):** ${fmt(effectSize, 4)}\n`;
|
|
1440
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1441
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1442
|
+
out += `### Result\n\n`;
|
|
1443
|
+
out += `**Required sample size per group: ${n}**\n`;
|
|
1444
|
+
out += `**Total sample size: ${2 * n}**\n`;
|
|
1445
|
+
}
|
|
1446
|
+
else if (solveFor === 'power') {
|
|
1447
|
+
// Need sample size from effect; use n = 30 as default context
|
|
1448
|
+
// Actually: compute power for a range of n
|
|
1449
|
+
out += `- **Effect Size (d):** ${fmt(effectSize, 4)}\n`;
|
|
1450
|
+
out += `- **Alpha:** ${alpha}\n\n`;
|
|
1451
|
+
out += `### Power for Various Sample Sizes\n\n`;
|
|
1452
|
+
out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
|
|
1453
|
+
for (const n of [10, 20, 30, 50, 75, 100, 150, 200, 300, 500]) {
|
|
1454
|
+
const noncentrality = effectSize * Math.sqrt(n / 2);
|
|
1455
|
+
const computedPower = 1 - normalCDF(zAlpha - noncentrality);
|
|
1456
|
+
out += `| ${n} | ${2 * n} | ${fmt(computedPower, 4)} |\n`;
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
else if (solveFor === 'effect') {
|
|
1460
|
+
// Minimum detectable effect for given n and power
|
|
1461
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1462
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1463
|
+
out += `### Minimum Detectable Effect for Various Sample Sizes\n\n`;
|
|
1464
|
+
out += `| n (per group) | Total N | Min Effect (d) | Interpretation |\n|---|---|---|---|\n`;
|
|
1465
|
+
const zBeta = normalQuantile(power);
|
|
1466
|
+
for (const n of [10, 20, 30, 50, 75, 100, 150, 200, 300, 500]) {
|
|
1467
|
+
const d = (zAlpha + zBeta) / Math.sqrt(n / 2);
|
|
1468
|
+
const interp = d >= 0.8 ? 'Large' : d >= 0.5 ? 'Medium' : d >= 0.2 ? 'Small' : 'Tiny';
|
|
1469
|
+
out += `| ${n} | ${2 * n} | ${fmt(d, 4)} | ${interp} |\n`;
|
|
1470
|
+
}
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
else if (testType === 'anova') {
|
|
1474
|
+
// One-way ANOVA: f = effect size (Cohen's f)
|
|
1475
|
+
// df1 = k - 1 (assume k=3 groups if not specified)
|
|
1476
|
+
const k = 3;
|
|
1477
|
+
const df1 = k - 1;
|
|
1478
|
+
if (solveFor === 'n') {
|
|
1479
|
+
const zBeta = normalQuantile(power);
|
|
1480
|
+
// Approximation: n per group ~ ((z_alpha + z_beta)^2) / (f^2 * k) + corrections
|
|
1481
|
+
const lambda = effectSize * effectSize; // noncentrality per observation
|
|
1482
|
+
const nPerGroup = Math.ceil(((zAlpha + zBeta) ** 2) / (lambda * k) + df1 / 2);
|
|
1483
|
+
const totalN = nPerGroup * k;
|
|
1484
|
+
out += `- **Effect Size (f):** ${fmt(effectSize, 4)}\n`;
|
|
1485
|
+
out += `- **Groups (k):** ${k}\n`;
|
|
1486
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1487
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1488
|
+
out += `### Result\n\n`;
|
|
1489
|
+
out += `**Required sample size per group: ~${nPerGroup}**\n`;
|
|
1490
|
+
out += `**Total sample size: ~${totalN}**\n`;
|
|
1491
|
+
}
|
|
1492
|
+
else if (solveFor === 'power') {
|
|
1493
|
+
out += `- **Effect Size (f):** ${fmt(effectSize, 4)}\n`;
|
|
1494
|
+
out += `- **Groups (k):** ${k}\n`;
|
|
1495
|
+
out += `- **Alpha:** ${alpha}\n\n`;
|
|
1496
|
+
out += `### Power for Various Sample Sizes\n\n`;
|
|
1497
|
+
out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
|
|
1498
|
+
for (const n of [10, 20, 30, 50, 75, 100, 150]) {
|
|
1499
|
+
const lambda = n * k * effectSize * effectSize;
|
|
1500
|
+
// Power approx via noncentral F → normal approximation
|
|
1501
|
+
const ncp = Math.sqrt(lambda);
|
|
1502
|
+
const critVal = normalQuantile(1 - alpha);
|
|
1503
|
+
const computedPower = 1 - normalCDF(critVal - ncp);
|
|
1504
|
+
out += `| ${n} | ${n * k} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
else {
|
|
1508
|
+
const zBeta = normalQuantile(power);
|
|
1509
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1510
|
+
out += `- **Power:** ${power}\n`;
|
|
1511
|
+
out += `- **Groups (k):** ${k}\n\n`;
|
|
1512
|
+
out += `### Minimum Detectable Effect (f) for Various Sample Sizes\n\n`;
|
|
1513
|
+
out += `| n (per group) | Total N | Min Effect (f) | Interpretation |\n|---|---|---|---|\n`;
|
|
1514
|
+
for (const n of [10, 20, 30, 50, 75, 100, 150, 200]) {
|
|
1515
|
+
const f = (zAlpha + zBeta) / Math.sqrt(n * k);
|
|
1516
|
+
const interp = f >= 0.4 ? 'Large' : f >= 0.25 ? 'Medium' : f >= 0.1 ? 'Small' : 'Tiny';
|
|
1517
|
+
out += `| ${n} | ${n * k} | ${fmt(f, 4)} | ${interp} |\n`;
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
else if (testType === 'chi_square') {
|
|
1522
|
+
// Chi-square test: w = effect size, df = (r-1)(c-1)
|
|
1523
|
+
const df = 1; // default for 2x2
|
|
1524
|
+
if (solveFor === 'n') {
|
|
1525
|
+
const zBeta = normalQuantile(power);
|
|
1526
|
+
const n = Math.ceil(((zAlpha + zBeta) / effectSize) ** 2);
|
|
1527
|
+
out += `- **Effect Size (w):** ${fmt(effectSize, 4)}\n`;
|
|
1528
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1529
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1530
|
+
out += `### Result\n\n`;
|
|
1531
|
+
out += `**Required total sample size: ${n}**\n`;
|
|
1532
|
+
}
|
|
1533
|
+
else if (solveFor === 'power') {
|
|
1534
|
+
out += `- **Effect Size (w):** ${fmt(effectSize, 4)}\n`;
|
|
1535
|
+
out += `- **Alpha:** ${alpha}\n\n`;
|
|
1536
|
+
out += `### Power for Various Sample Sizes\n\n`;
|
|
1537
|
+
out += `| N | Power |\n|---|---|\n`;
|
|
1538
|
+
for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
|
|
1539
|
+
const ncp = n * effectSize * effectSize;
|
|
1540
|
+
const critVal = normalQuantile(1 - alpha);
|
|
1541
|
+
const computedPower = 1 - normalCDF(critVal - Math.sqrt(ncp));
|
|
1542
|
+
out += `| ${n} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
else {
|
|
1546
|
+
const zBeta = normalQuantile(power);
|
|
1547
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1548
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1549
|
+
out += `### Minimum Detectable Effect (w) for Various Sample Sizes\n\n`;
|
|
1550
|
+
out += `| N | Min Effect (w) | Interpretation |\n|---|---|---|\n`;
|
|
1551
|
+
for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
|
|
1552
|
+
const w = (zAlpha + zBeta) / Math.sqrt(n);
|
|
1553
|
+
const interp = w >= 0.5 ? 'Large' : w >= 0.3 ? 'Medium' : w >= 0.1 ? 'Small' : 'Tiny';
|
|
1554
|
+
out += `| ${n} | ${fmt(w, 4)} | ${interp} |\n`;
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
else if (testType === 'proportion') {
|
|
1559
|
+
// Two-proportion z-test: h = effect size (Cohen's h)
|
|
1560
|
+
if (solveFor === 'n') {
|
|
1561
|
+
const zBeta = normalQuantile(power);
|
|
1562
|
+
const n = Math.ceil(((zAlpha + zBeta) / effectSize) ** 2);
|
|
1563
|
+
out += `- **Effect Size (h):** ${fmt(effectSize, 4)}\n`;
|
|
1564
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1565
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1566
|
+
out += `### Result\n\n`;
|
|
1567
|
+
out += `**Required sample size per group: ${n}**\n`;
|
|
1568
|
+
out += `**Total sample size: ${2 * n}**\n`;
|
|
1569
|
+
}
|
|
1570
|
+
else if (solveFor === 'power') {
|
|
1571
|
+
out += `- **Effect Size (h):** ${fmt(effectSize, 4)}\n`;
|
|
1572
|
+
out += `- **Alpha:** ${alpha}\n\n`;
|
|
1573
|
+
out += `### Power for Various Sample Sizes\n\n`;
|
|
1574
|
+
out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
|
|
1575
|
+
for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
|
|
1576
|
+
const ncp = effectSize * Math.sqrt(n);
|
|
1577
|
+
const computedPower = 1 - normalCDF(zAlpha - ncp);
|
|
1578
|
+
out += `| ${n} | ${2 * n} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
else {
|
|
1582
|
+
const zBeta = normalQuantile(power);
|
|
1583
|
+
out += `- **Alpha:** ${alpha}\n`;
|
|
1584
|
+
out += `- **Power:** ${power}\n\n`;
|
|
1585
|
+
out += `### Minimum Detectable Effect (h) for Various Sample Sizes\n\n`;
|
|
1586
|
+
out += `| n (per group) | Total N | Min Effect (h) | Interpretation |\n|---|---|---|---|\n`;
|
|
1587
|
+
for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
|
|
1588
|
+
const h = (zAlpha + zBeta) / Math.sqrt(n);
|
|
1589
|
+
const interp = h >= 0.8 ? 'Large' : h >= 0.5 ? 'Medium' : h >= 0.2 ? 'Small' : 'Tiny';
|
|
1590
|
+
out += `| ${n} | ${2 * n} | ${fmt(h, 4)} | ${interp} |\n`;
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
else {
|
|
1595
|
+
return `**Error**: Unknown test_type "${testType}". Use: t_test, anova, chi_square, proportion.`;
|
|
1596
|
+
}
|
|
1597
|
+
out += `\n### Effect Size Guidelines\n\n`;
|
|
1598
|
+
out += `| Test | Small | Medium | Large |\n|---|---|---|---|\n`;
|
|
1599
|
+
out += `| t-test (d) | 0.20 | 0.50 | 0.80 |\n`;
|
|
1600
|
+
out += `| ANOVA (f) | 0.10 | 0.25 | 0.40 |\n`;
|
|
1601
|
+
out += `| Chi-square (w) | 0.10 | 0.30 | 0.50 |\n`;
|
|
1602
|
+
out += `| Proportion (h) | 0.20 | 0.50 | 0.80 |\n`;
|
|
1603
|
+
return out;
|
|
1604
|
+
},
|
|
1605
|
+
});
|
|
1606
|
+
// ── 8. ANOVA TEST ──
|
|
1607
|
+
registerTool({
|
|
1608
|
+
name: 'anova_test',
|
|
1609
|
+
description: 'One-way ANOVA with Tukey HSD and Bonferroni post-hoc tests. Returns F-statistic, p-value, eta-squared, and pairwise comparisons.',
|
|
1610
|
+
parameters: {
|
|
1611
|
+
groups: { type: 'string', description: 'Groups as JSON array of arrays (each inner array is one group\'s data)', required: true },
|
|
1612
|
+
test_type: { type: 'string', description: 'Test type: one_way', required: true },
|
|
1613
|
+
post_hoc: { type: 'string', description: 'Post-hoc test: tukey or bonferroni', required: true },
|
|
1614
|
+
},
|
|
1615
|
+
tier: 'free',
|
|
1616
|
+
async execute(args) {
|
|
1617
|
+
let groups;
|
|
1618
|
+
try {
|
|
1619
|
+
groups = JSON.parse(String(args.groups));
|
|
1620
|
+
}
|
|
1621
|
+
catch {
|
|
1622
|
+
return '**Error**: groups must be a valid JSON array of arrays.';
|
|
1623
|
+
}
|
|
1624
|
+
if (!Array.isArray(groups) || groups.length < 2) {
|
|
1625
|
+
return '**Error**: Need at least 2 groups.';
|
|
1626
|
+
}
|
|
1627
|
+
const postHoc = String(args.post_hoc).toLowerCase();
|
|
1628
|
+
const k = groups.length;
|
|
1629
|
+
const ns = groups.map(g => g.length);
|
|
1630
|
+
const N = sum(ns);
|
|
1631
|
+
const groupMeans = groups.map(g => mean(g));
|
|
1632
|
+
const grandMean = mean(groups.flat());
|
|
1633
|
+
// Sum of squares
|
|
1634
|
+
let ssBetween = 0;
|
|
1635
|
+
for (let i = 0; i < k; i++) {
|
|
1636
|
+
ssBetween += ns[i] * (groupMeans[i] - grandMean) ** 2;
|
|
1637
|
+
}
|
|
1638
|
+
let ssWithin = 0;
|
|
1639
|
+
for (let i = 0; i < k; i++) {
|
|
1640
|
+
for (const val of groups[i]) {
|
|
1641
|
+
ssWithin += (val - groupMeans[i]) ** 2;
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
const ssTotal = ssBetween + ssWithin;
|
|
1645
|
+
const dfBetween = k - 1;
|
|
1646
|
+
const dfWithin = N - k;
|
|
1647
|
+
const msBetween = ssBetween / dfBetween;
|
|
1648
|
+
const msWithin = ssWithin / dfWithin;
|
|
1649
|
+
const fStat = msWithin > 0 ? msBetween / msWithin : 0;
|
|
1650
|
+
const pValue = 1 - fCDF(fStat, dfBetween, dfWithin);
|
|
1651
|
+
const etaSquared = ssTotal > 0 ? ssBetween / ssTotal : 0;
|
|
1652
|
+
const omegaSquared = (ssBetween - dfBetween * msWithin) / (ssTotal + msWithin);
|
|
1653
|
+
let out = `## One-Way ANOVA\n\n`;
|
|
1654
|
+
out += `### Group Descriptives\n\n`;
|
|
1655
|
+
out += `| Group | N | Mean | Std Dev |\n|---|---|---|---|\n`;
|
|
1656
|
+
groups.forEach((g, i) => {
|
|
1657
|
+
out += `| ${i + 1} | ${g.length} | ${fmt(groupMeans[i], 4)} | ${fmt(stddev(g), 4)} |\n`;
|
|
1658
|
+
});
|
|
1659
|
+
out += `| **Total** | **${N}** | **${fmt(grandMean, 4)}** | **${fmt(stddev(groups.flat()), 4)}** |\n`;
|
|
1660
|
+
out += `\n### ANOVA Table\n\n`;
|
|
1661
|
+
out += `| Source | SS | df | MS | F | p-value |\n|---|---|---|---|---|---|\n`;
|
|
1662
|
+
out += `| Between | ${fmt(ssBetween, 4)} | ${dfBetween} | ${fmt(msBetween, 4)} | ${fmt(fStat, 4)} | ${pValue < 0.001 ? '<0.001' : fmt(pValue, 4)} |\n`;
|
|
1663
|
+
out += `| Within | ${fmt(ssWithin, 4)} | ${dfWithin} | ${fmt(msWithin, 4)} | | |\n`;
|
|
1664
|
+
out += `| Total | ${fmt(ssTotal, 4)} | ${N - 1} | | | |\n`;
|
|
1665
|
+
out += `\n### Effect Sizes\n\n`;
|
|
1666
|
+
out += `| Measure | Value | Interpretation |\n|---|---|---|\n`;
|
|
1667
|
+
const etaInterp = etaSquared >= 0.14 ? 'Large' : etaSquared >= 0.06 ? 'Medium' : 'Small';
|
|
1668
|
+
out += `| Eta-squared (eta²) | ${fmt(etaSquared, 4)} | ${etaInterp} |\n`;
|
|
1669
|
+
out += `| Omega-squared (omega²) | ${fmt(Math.max(0, omegaSquared), 4)} | — |\n`;
|
|
1670
|
+
// Post-hoc pairwise comparisons
|
|
1671
|
+
out += `\n### Post-Hoc: ${postHoc === 'tukey' ? 'Tukey HSD' : 'Bonferroni'}\n\n`;
|
|
1672
|
+
out += `| Comparison | Diff | SE | Statistic | p-value | Significant |\n|---|---|---|---|---|---|\n`;
|
|
1673
|
+
const nComparisons = k * (k - 1) / 2;
|
|
1674
|
+
for (let i = 0; i < k; i++) {
|
|
1675
|
+
for (let j = i + 1; j < k; j++) {
|
|
1676
|
+
const diff = groupMeans[i] - groupMeans[j];
|
|
1677
|
+
const se = Math.sqrt(msWithin * (1 / ns[i] + 1 / ns[j]));
|
|
1678
|
+
const stat = Math.abs(diff) / se;
|
|
1679
|
+
let pVal;
|
|
1680
|
+
if (postHoc === 'tukey') {
|
|
1681
|
+
// Tukey HSD: compare q = stat * sqrt(2) to Studentized range distribution
|
|
1682
|
+
// Approximate p-value using t distribution
|
|
1683
|
+
const tStat = stat;
|
|
1684
|
+
const rawP = 2 * (1 - tCDF(tStat, dfWithin));
|
|
1685
|
+
// Tukey adjustment: multiply by number of comparisons (conservative)
|
|
1686
|
+
pVal = Math.min(1, rawP * nComparisons);
|
|
1687
|
+
}
|
|
1688
|
+
else {
|
|
1689
|
+
// Bonferroni: regular t-test p-value * number of comparisons
|
|
1690
|
+
const tStat = stat;
|
|
1691
|
+
const rawP = 2 * (1 - tCDF(tStat, dfWithin));
|
|
1692
|
+
pVal = Math.min(1, rawP * nComparisons);
|
|
1693
|
+
}
|
|
1694
|
+
const sig = pVal < 0.001 ? '***' : pVal < 0.01 ? '**' : pVal < 0.05 ? '*' : 'ns';
|
|
1695
|
+
out += `| G${i + 1} vs G${j + 1} | ${fmt(diff, 4)} | ${fmt(se, 4)} | ${fmt(stat, 4)} | ${pVal < 0.001 ? '<0.001' : fmt(pVal, 4)} | ${sig} |\n`;
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
out += `\nSignificance: \\*\\*\\* p<0.001, \\*\\* p<0.01, \\* p<0.05, ns = not significant\n`;
|
|
1699
|
+
return out;
|
|
1700
|
+
},
|
|
1701
|
+
});
|
|
1702
|
+
// ── 9. SURVIVAL ANALYSIS ──
|
|
1703
|
+
registerTool({
|
|
1704
|
+
name: 'survival_analysis',
|
|
1705
|
+
description: 'Kaplan-Meier survival curves with log-rank test. Returns survival probabilities at each time point, median survival, hazard ratios, and p-value.',
|
|
1706
|
+
parameters: {
|
|
1707
|
+
times: { type: 'string', description: 'Event/censoring times (comma-separated numbers)', required: true },
|
|
1708
|
+
events: { type: 'string', description: 'Event indicators (comma-separated: 1=event, 0=censored)', required: true },
|
|
1709
|
+
groups: { type: 'string', description: 'Group labels for each subject (comma-separated, optional for comparing 2+ groups)' },
|
|
1710
|
+
},
|
|
1711
|
+
tier: 'free',
|
|
1712
|
+
async execute(args) {
|
|
1713
|
+
const times = parseCSV(String(args.times));
|
|
1714
|
+
const events = parseCSV(String(args.events)).map(v => v === 1 ? 1 : 0);
|
|
1715
|
+
if (times.length !== events.length || times.length < 2) {
|
|
1716
|
+
return '**Error**: times and events must have equal length (minimum 2).';
|
|
1717
|
+
}
|
|
1718
|
+
const groupLabels = args.groups
|
|
1719
|
+
? String(args.groups).split(',').map(s => s.trim())
|
|
1720
|
+
: null;
|
|
1721
|
+
const n = times.length;
|
|
1722
|
+
// Kaplan-Meier estimator for a single group
|
|
1723
|
+
function kaplanMeier(t, e) {
|
|
1724
|
+
// Sort by time
|
|
1725
|
+
const indices = t.map((_, i) => i).sort((a, b) => t[a] - t[b]);
|
|
1726
|
+
const sortedT = indices.map(i => t[i]);
|
|
1727
|
+
const sortedE = indices.map(i => e[i]);
|
|
1728
|
+
// Get unique event times
|
|
1729
|
+
const uniqueTimes = [];
|
|
1730
|
+
const eventCounts = [];
|
|
1731
|
+
const censorCounts = [];
|
|
1732
|
+
let i = 0;
|
|
1733
|
+
while (i < sortedT.length) {
|
|
1734
|
+
const currentTime = sortedT[i];
|
|
1735
|
+
let nEvents = 0, nCensored = 0;
|
|
1736
|
+
while (i < sortedT.length && sortedT[i] === currentTime) {
|
|
1737
|
+
if (sortedE[i] === 1)
|
|
1738
|
+
nEvents++;
|
|
1739
|
+
else
|
|
1740
|
+
nCensored++;
|
|
1741
|
+
i++;
|
|
1742
|
+
}
|
|
1743
|
+
if (nEvents > 0 || nCensored > 0) {
|
|
1744
|
+
uniqueTimes.push(currentTime);
|
|
1745
|
+
eventCounts.push(nEvents);
|
|
1746
|
+
censorCounts.push(nCensored);
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
// Product-limit estimator
|
|
1750
|
+
const survTimes = [0];
|
|
1751
|
+
const survProbs = [1.0];
|
|
1752
|
+
const nRisk = [t.length];
|
|
1753
|
+
const nEvent = [0];
|
|
1754
|
+
const variances = [0];
|
|
1755
|
+
let atRisk = t.length;
|
|
1756
|
+
let survProb = 1.0;
|
|
1757
|
+
let greenwoodSum = 0;
|
|
1758
|
+
for (let j = 0; j < uniqueTimes.length; j++) {
|
|
1759
|
+
const d = eventCounts[j];
|
|
1760
|
+
const c = censorCounts[j];
|
|
1761
|
+
if (d > 0) {
|
|
1762
|
+
survProb *= (atRisk - d) / atRisk;
|
|
1763
|
+
if (atRisk > d) {
|
|
1764
|
+
greenwoodSum += d / (atRisk * (atRisk - d));
|
|
1765
|
+
}
|
|
1766
|
+
survTimes.push(uniqueTimes[j]);
|
|
1767
|
+
survProbs.push(survProb);
|
|
1768
|
+
nRisk.push(atRisk);
|
|
1769
|
+
nEvent.push(d);
|
|
1770
|
+
variances.push(survProb * survProb * greenwoodSum); // Greenwood's formula
|
|
1771
|
+
}
|
|
1772
|
+
atRisk -= d + c;
|
|
1773
|
+
}
|
|
1774
|
+
// Median survival: first time S(t) <= 0.5
|
|
1775
|
+
let medianSurvival = null;
|
|
1776
|
+
for (let j = 1; j < survProbs.length; j++) {
|
|
1777
|
+
if (survProbs[j] <= 0.5) {
|
|
1778
|
+
medianSurvival = survTimes[j];
|
|
1779
|
+
break;
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
return {
|
|
1783
|
+
times: survTimes,
|
|
1784
|
+
survival: survProbs,
|
|
1785
|
+
nRisk,
|
|
1786
|
+
nEvent,
|
|
1787
|
+
variance: variances,
|
|
1788
|
+
medianSurvival,
|
|
1789
|
+
};
|
|
1790
|
+
}
|
|
1791
|
+
let out = `## Survival Analysis — Kaplan-Meier\n\n`;
|
|
1792
|
+
if (!groupLabels) {
|
|
1793
|
+
// Single group
|
|
1794
|
+
const km = kaplanMeier(times, events);
|
|
1795
|
+
const totalEvents = events.filter(e => e === 1).length;
|
|
1796
|
+
out += `**N = ${n}** | Events: ${totalEvents} | Censored: ${n - totalEvents}\n\n`;
|
|
1797
|
+
out += `### Survival Table\n\n`;
|
|
1798
|
+
out += `| Time | N at Risk | Events | S(t) | 95% CI |\n|---|---|---|---|---|\n`;
|
|
1799
|
+
for (let i = 0; i < km.times.length; i++) {
|
|
1800
|
+
const se = Math.sqrt(km.variance[i]);
|
|
1801
|
+
const ciLower = Math.max(0, km.survival[i] - 1.96 * se);
|
|
1802
|
+
const ciUpper = Math.min(1, km.survival[i] + 1.96 * se);
|
|
1803
|
+
out += `| ${fmt(km.times[i], 2)} | ${km.nRisk[i]} | ${km.nEvent[i]} | ${fmt(km.survival[i], 4)} | [${fmt(ciLower, 4)}, ${fmt(ciUpper, 4)}] |\n`;
|
|
1804
|
+
}
|
|
1805
|
+
out += `\n**Median Survival:** ${km.medianSurvival !== null ? fmt(km.medianSurvival, 2) : 'Not reached'}\n`;
|
|
1806
|
+
}
|
|
1807
|
+
else {
|
|
1808
|
+
// Multiple groups — KM per group + log-rank test
|
|
1809
|
+
if (groupLabels.length !== n) {
|
|
1810
|
+
return `**Error**: groups length (${groupLabels.length}) must match times length (${n}).`;
|
|
1811
|
+
}
|
|
1812
|
+
const uniqueGroups = [...new Set(groupLabels)];
|
|
1813
|
+
const groupData = {};
|
|
1814
|
+
for (const g of uniqueGroups) {
|
|
1815
|
+
groupData[g] = { times: [], events: [] };
|
|
1816
|
+
}
|
|
1817
|
+
for (let i = 0; i < n; i++) {
|
|
1818
|
+
groupData[groupLabels[i]].times.push(times[i]);
|
|
1819
|
+
groupData[groupLabels[i]].events.push(events[i]);
|
|
1820
|
+
}
|
|
1821
|
+
// KM for each group
|
|
1822
|
+
const kmResults = {};
|
|
1823
|
+
for (const g of uniqueGroups) {
|
|
1824
|
+
kmResults[g] = kaplanMeier(groupData[g].times, groupData[g].events);
|
|
1825
|
+
}
|
|
1826
|
+
// Log-rank test
|
|
1827
|
+
// Get all unique event times across all groups
|
|
1828
|
+
const allEventTimes = [...new Set(times.filter((_, i) => events[i] === 1))].sort((a, b) => a - b);
|
|
1829
|
+
let chiSq = 0;
|
|
1830
|
+
const observed = {};
|
|
1831
|
+
const expected = {};
|
|
1832
|
+
for (const g of uniqueGroups) {
|
|
1833
|
+
observed[g] = 0;
|
|
1834
|
+
expected[g] = 0;
|
|
1835
|
+
}
|
|
1836
|
+
for (const t of allEventTimes) {
|
|
1837
|
+
// At each event time, count at-risk and events per group
|
|
1838
|
+
const atRiskPerGroup = {};
|
|
1839
|
+
const eventsPerGroup = {};
|
|
1840
|
+
let totalAtRisk = 0;
|
|
1841
|
+
let totalEvents = 0;
|
|
1842
|
+
for (const g of uniqueGroups) {
|
|
1843
|
+
const gd = groupData[g];
|
|
1844
|
+
let risk = 0, ev = 0;
|
|
1845
|
+
for (let i = 0; i < gd.times.length; i++) {
|
|
1846
|
+
if (gd.times[i] >= t)
|
|
1847
|
+
risk++;
|
|
1848
|
+
if (gd.times[i] === t && gd.events[i] === 1)
|
|
1849
|
+
ev++;
|
|
1850
|
+
}
|
|
1851
|
+
atRiskPerGroup[g] = risk;
|
|
1852
|
+
eventsPerGroup[g] = ev;
|
|
1853
|
+
totalAtRisk += risk;
|
|
1854
|
+
totalEvents += ev;
|
|
1855
|
+
}
|
|
1856
|
+
if (totalAtRisk === 0)
|
|
1857
|
+
continue;
|
|
1858
|
+
for (const g of uniqueGroups) {
|
|
1859
|
+
observed[g] += eventsPerGroup[g];
|
|
1860
|
+
expected[g] += (atRiskPerGroup[g] / totalAtRisk) * totalEvents;
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
// Log-rank chi-square = sum((O-E)^2 / E) with df = k-1
|
|
1864
|
+
for (const g of uniqueGroups) {
|
|
1865
|
+
if (expected[g] > 0) {
|
|
1866
|
+
chiSq += (observed[g] - expected[g]) ** 2 / expected[g];
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
const lrDf = uniqueGroups.length - 1;
|
|
1870
|
+
const lrPValue = 1 - chiSquareCDF(chiSq, lrDf);
|
|
1871
|
+
// Hazard ratio (for 2 groups): HR = (O1/E1) / (O2/E2)
|
|
1872
|
+
let hazardRatio = null;
|
|
1873
|
+
if (uniqueGroups.length === 2) {
|
|
1874
|
+
const g1 = uniqueGroups[0], g2 = uniqueGroups[1];
|
|
1875
|
+
if (expected[g1] > 0 && expected[g2] > 0) {
|
|
1876
|
+
hazardRatio = (observed[g1] / expected[g1]) / (observed[g2] / expected[g2]);
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
out += `### Group Summary\n\n`;
|
|
1880
|
+
out += `| Group | N | Events | Censored | Median Survival |\n|---|---|---|---|---|\n`;
|
|
1881
|
+
for (const g of uniqueGroups) {
|
|
1882
|
+
const gd = groupData[g];
|
|
1883
|
+
const nEvents = gd.events.filter(e => e === 1).length;
|
|
1884
|
+
out += `| ${g} | ${gd.times.length} | ${nEvents} | ${gd.times.length - nEvents} | ${kmResults[g].medianSurvival !== null ? fmt(kmResults[g].medianSurvival, 2) : 'NR'} |\n`;
|
|
1885
|
+
}
|
|
1886
|
+
out += `\n### Log-Rank Test\n\n`;
|
|
1887
|
+
out += `| Statistic | Value |\n|---|---|\n`;
|
|
1888
|
+
out += `| Chi-square | ${fmt(chiSq, 4)} |\n`;
|
|
1889
|
+
out += `| df | ${lrDf} |\n`;
|
|
1890
|
+
out += `| p-value | ${lrPValue < 0.001 ? '<0.001' : fmt(lrPValue, 4)} |\n`;
|
|
1891
|
+
if (hazardRatio !== null) {
|
|
1892
|
+
out += `| Hazard Ratio (${uniqueGroups[0]} vs ${uniqueGroups[1]}) | ${fmt(hazardRatio, 4)} |\n`;
|
|
1893
|
+
}
|
|
1894
|
+
out += `\n### Observed vs Expected Events\n\n`;
|
|
1895
|
+
out += `| Group | Observed | Expected | O/E |\n|---|---|---|---|\n`;
|
|
1896
|
+
for (const g of uniqueGroups) {
|
|
1897
|
+
out += `| ${g} | ${fmt(observed[g], 1)} | ${fmt(expected[g], 2)} | ${fmt(expected[g] > 0 ? observed[g] / expected[g] : 0, 4)} |\n`;
|
|
1898
|
+
}
|
|
1899
|
+
// Survival table for each group
|
|
1900
|
+
for (const g of uniqueGroups) {
|
|
1901
|
+
const km = kmResults[g];
|
|
1902
|
+
out += `\n### Survival Table — ${g}\n\n`;
|
|
1903
|
+
out += `| Time | N at Risk | Events | S(t) |\n|---|---|---|---|\n`;
|
|
1904
|
+
for (let i = 0; i < km.times.length; i++) {
|
|
1905
|
+
out += `| ${fmt(km.times[i], 2)} | ${km.nRisk[i]} | ${km.nEvent[i]} | ${fmt(km.survival[i], 4)} |\n`;
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
return out;
|
|
1910
|
+
},
|
|
1911
|
+
});
|
|
1912
|
+
// ── 10. VIZ CODEGEN ──
|
|
1913
|
+
registerTool({
|
|
1914
|
+
name: 'viz_codegen',
|
|
1915
|
+
description: 'Generate publication-quality plot code in Python (matplotlib/seaborn) or R (ggplot2). Supports: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq plot types.',
|
|
1916
|
+
parameters: {
|
|
1917
|
+
chart_type: { type: 'string', description: 'Chart type: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq', required: true },
|
|
1918
|
+
language: { type: 'string', description: 'Output language: python or r', required: true },
|
|
1919
|
+
data_description: { type: 'string', description: 'Description of the data (variable names, types, context)', required: true },
|
|
1920
|
+
title: { type: 'string', description: 'Plot title', required: true },
|
|
1921
|
+
style: { type: 'string', description: 'Style: publication or presentation (default publication)' },
|
|
1922
|
+
},
|
|
1923
|
+
tier: 'free',
|
|
1924
|
+
async execute(args) {
|
|
1925
|
+
const chartType = String(args.chart_type).toLowerCase().replace(/[_\s-]/g, '_');
|
|
1926
|
+
const language = String(args.language).toLowerCase();
|
|
1927
|
+
const dataDesc = String(args.data_description);
|
|
1928
|
+
const title = String(args.title);
|
|
1929
|
+
const style = String(args.style || 'publication').toLowerCase();
|
|
1930
|
+
if (language !== 'python' && language !== 'r') {
|
|
1931
|
+
return `**Error**: language must be "python" or "r".`;
|
|
1932
|
+
}
|
|
1933
|
+
const isPub = style === 'publication';
|
|
1934
|
+
let code = '';
|
|
1935
|
+
if (language === 'python') {
|
|
1936
|
+
// Python: matplotlib + seaborn
|
|
1937
|
+
const preamble = [
|
|
1938
|
+
`import numpy as np`,
|
|
1939
|
+
`import pandas as pd`,
|
|
1940
|
+
`import matplotlib.pyplot as plt`,
|
|
1941
|
+
`import seaborn as sns`,
|
|
1942
|
+
`from matplotlib import rcParams`,
|
|
1943
|
+
``,
|
|
1944
|
+
`# ${isPub ? 'Publication' : 'Presentation'} style`,
|
|
1945
|
+
isPub
|
|
1946
|
+
? `plt.style.use('seaborn-v0_8-whitegrid')\nrcParams.update({'font.family': 'serif', 'font.serif': ['Times New Roman'], 'font.size': 10, 'axes.labelsize': 11, 'axes.titlesize': 12, 'figure.dpi': 300, 'savefig.dpi': 300, 'figure.figsize': (6, 4)})`
|
|
1947
|
+
: `plt.style.use('seaborn-v0_8-darkgrid')\nrcParams.update({'font.family': 'sans-serif', 'font.size': 14, 'axes.labelsize': 16, 'axes.titlesize': 18, 'figure.dpi': 150, 'figure.figsize': (10, 7)})`,
|
|
1948
|
+
``,
|
|
1949
|
+
`# --- Data: ${dataDesc} ---`,
|
|
1950
|
+
`# Replace with your actual data`,
|
|
1951
|
+
].join('\n');
|
|
1952
|
+
if (chartType === 'scatter') {
|
|
1953
|
+
code = `${preamble}
|
|
1954
|
+
x = np.random.randn(100)
|
|
1955
|
+
y = 2.5 * x + np.random.randn(100) * 0.5
|
|
1956
|
+
|
|
1957
|
+
fig, ax = plt.subplots()
|
|
1958
|
+
scatter = ax.scatter(x, y, c='steelblue', alpha=0.7, edgecolors='white', s=50)
|
|
1959
|
+
|
|
1960
|
+
# Regression line
|
|
1961
|
+
z = np.polyfit(x, y, 1)
|
|
1962
|
+
p = np.poly1d(z)
|
|
1963
|
+
x_line = np.linspace(x.min(), x.max(), 100)
|
|
1964
|
+
ax.plot(x_line, p(x_line), 'r--', linewidth=1.5, label=f'y = {z[0]:.2f}x + {z[1]:.2f}')
|
|
1965
|
+
|
|
1966
|
+
ax.set_xlabel('X variable')
|
|
1967
|
+
ax.set_ylabel('Y variable')
|
|
1968
|
+
ax.set_title('${title}')
|
|
1969
|
+
ax.legend(frameon=True)
|
|
1970
|
+
plt.tight_layout()
|
|
1971
|
+
plt.savefig('scatter_plot.png', bbox_inches='tight')
|
|
1972
|
+
plt.show()`;
|
|
1973
|
+
}
|
|
1974
|
+
else if (chartType === 'histogram') {
|
|
1975
|
+
code = `${preamble}
|
|
1976
|
+
data = np.random.randn(500)
|
|
1977
|
+
|
|
1978
|
+
fig, ax = plt.subplots()
|
|
1979
|
+
n, bins, patches = ax.hist(data, bins=30, color='steelblue', edgecolor='white',
|
|
1980
|
+
alpha=0.8, density=True)
|
|
1981
|
+
|
|
1982
|
+
# Overlay normal curve
|
|
1983
|
+
from scipy import stats
|
|
1984
|
+
xmin, xmax = ax.get_xlim()
|
|
1985
|
+
x = np.linspace(xmin, xmax, 100)
|
|
1986
|
+
mu, std = stats.norm.fit(data)
|
|
1987
|
+
ax.plot(x, stats.norm.pdf(x, mu, std), 'r-', linewidth=2,
|
|
1988
|
+
label=f'Normal fit (mu={mu:.2f}, sigma={std:.2f})')
|
|
1989
|
+
|
|
1990
|
+
ax.set_xlabel('Value')
|
|
1991
|
+
ax.set_ylabel('Density')
|
|
1992
|
+
ax.set_title('${title}')
|
|
1993
|
+
ax.legend(frameon=True)
|
|
1994
|
+
plt.tight_layout()
|
|
1995
|
+
plt.savefig('histogram.png', bbox_inches='tight')
|
|
1996
|
+
plt.show()`;
|
|
1997
|
+
}
|
|
1998
|
+
else if (chartType === 'heatmap') {
|
|
1999
|
+
code = `${preamble}
|
|
2000
|
+
data = np.random.randn(10, 10)
|
|
2001
|
+
labels = [f'Var{i+1}' for i in range(10)]
|
|
2002
|
+
|
|
2003
|
+
fig, ax = plt.subplots(figsize=(8, 6))
|
|
2004
|
+
im = sns.heatmap(data, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
|
|
2005
|
+
xticklabels=labels, yticklabels=labels,
|
|
2006
|
+
linewidths=0.5, ax=ax, cbar_kws={'shrink': 0.8})
|
|
2007
|
+
ax.set_title('${title}')
|
|
2008
|
+
plt.tight_layout()
|
|
2009
|
+
plt.savefig('heatmap.png', bbox_inches='tight')
|
|
2010
|
+
plt.show()`;
|
|
2011
|
+
}
|
|
2012
|
+
else if (chartType === 'boxplot') {
|
|
2013
|
+
code = `${preamble}
|
|
2014
|
+
groups = ['Group A', 'Group B', 'Group C', 'Group D']
|
|
2015
|
+
data = [np.random.randn(50) + i for i, _ in enumerate(groups)]
|
|
2016
|
+
df = pd.DataFrame({g: pd.Series(d) for g, d in zip(groups, data)})
|
|
2017
|
+
df_melt = df.melt(var_name='Group', value_name='Value')
|
|
2018
|
+
|
|
2019
|
+
fig, ax = plt.subplots()
|
|
2020
|
+
bp = sns.boxplot(data=df_melt, x='Group', y='Value', palette='Set2',
|
|
2021
|
+
width=0.6, flierprops=dict(marker='o', markersize=4), ax=ax)
|
|
2022
|
+
sns.stripplot(data=df_melt, x='Group', y='Value', color='black',
|
|
2023
|
+
alpha=0.3, size=3, jitter=True, ax=ax)
|
|
2024
|
+
|
|
2025
|
+
ax.set_xlabel('')
|
|
2026
|
+
ax.set_ylabel('Value')
|
|
2027
|
+
ax.set_title('${title}')
|
|
2028
|
+
plt.tight_layout()
|
|
2029
|
+
plt.savefig('boxplot.png', bbox_inches='tight')
|
|
2030
|
+
plt.show()`;
|
|
2031
|
+
}
|
|
2032
|
+
else if (chartType === 'violin') {
|
|
2033
|
+
code = `${preamble}
|
|
2034
|
+
groups = ['Group A', 'Group B', 'Group C']
|
|
2035
|
+
data = [np.random.randn(100) + i * 0.5 for i, _ in enumerate(groups)]
|
|
2036
|
+
df = pd.DataFrame({g: pd.Series(d) for g, d in zip(groups, data)})
|
|
2037
|
+
df_melt = df.melt(var_name='Group', value_name='Value')
|
|
2038
|
+
|
|
2039
|
+
fig, ax = plt.subplots()
|
|
2040
|
+
sns.violinplot(data=df_melt, x='Group', y='Value', palette='muted',
|
|
2041
|
+
inner='box', linewidth=1, ax=ax)
|
|
2042
|
+
|
|
2043
|
+
ax.set_xlabel('')
|
|
2044
|
+
ax.set_ylabel('Value')
|
|
2045
|
+
ax.set_title('${title}')
|
|
2046
|
+
plt.tight_layout()
|
|
2047
|
+
plt.savefig('violin_plot.png', bbox_inches='tight')
|
|
2048
|
+
plt.show()`;
|
|
2049
|
+
}
|
|
2050
|
+
else if (chartType === 'line') {
|
|
2051
|
+
code = `${preamble}
|
|
2052
|
+
x = np.arange(0, 50)
|
|
2053
|
+
y1 = np.cumsum(np.random.randn(50)) + 10
|
|
2054
|
+
y2 = np.cumsum(np.random.randn(50)) + 10
|
|
2055
|
+
|
|
2056
|
+
fig, ax = plt.subplots()
|
|
2057
|
+
ax.plot(x, y1, '-o', color='steelblue', markersize=3, linewidth=1.5, label='Series A')
|
|
2058
|
+
ax.plot(x, y2, '-s', color='coral', markersize=3, linewidth=1.5, label='Series B')
|
|
2059
|
+
ax.fill_between(x, y1 - 1, y1 + 1, alpha=0.15, color='steelblue')
|
|
2060
|
+
ax.fill_between(x, y2 - 1, y2 + 1, alpha=0.15, color='coral')
|
|
2061
|
+
|
|
2062
|
+
ax.set_xlabel('Time')
|
|
2063
|
+
ax.set_ylabel('Value')
|
|
2064
|
+
ax.set_title('${title}')
|
|
2065
|
+
ax.legend(frameon=True)
|
|
2066
|
+
plt.tight_layout()
|
|
2067
|
+
plt.savefig('line_plot.png', bbox_inches='tight')
|
|
2068
|
+
plt.show()`;
|
|
2069
|
+
}
|
|
2070
|
+
else if (chartType === 'bar') {
|
|
2071
|
+
code = `${preamble}
|
|
2072
|
+
categories = ['Cat A', 'Cat B', 'Cat C', 'Cat D', 'Cat E']
|
|
2073
|
+
values = [23, 45, 56, 78, 32]
|
|
2074
|
+
errors = [3, 5, 4, 6, 3]
|
|
2075
|
+
|
|
2076
|
+
fig, ax = plt.subplots()
|
|
2077
|
+
bars = ax.bar(categories, values, yerr=errors, capsize=4,
|
|
2078
|
+
color='steelblue', edgecolor='white', alpha=0.85)
|
|
2079
|
+
|
|
2080
|
+
# Add value labels
|
|
2081
|
+
for bar, val in zip(bars, values):
|
|
2082
|
+
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 2,
|
|
2083
|
+
str(val), ha='center', va='bottom', fontsize=9)
|
|
2084
|
+
|
|
2085
|
+
ax.set_ylabel('Value')
|
|
2086
|
+
ax.set_title('${title}')
|
|
2087
|
+
ax.spines['top'].set_visible(False)
|
|
2088
|
+
ax.spines['right'].set_visible(False)
|
|
2089
|
+
plt.tight_layout()
|
|
2090
|
+
plt.savefig('bar_plot.png', bbox_inches='tight')
|
|
2091
|
+
plt.show()`;
|
|
2092
|
+
}
|
|
2093
|
+
else if (chartType === 'kaplan_meier') {
|
|
2094
|
+
code = `${preamble}
|
|
2095
|
+
from lifelines import KaplanMeierFitter
|
|
2096
|
+
from lifelines.statistics import logrank_test
|
|
2097
|
+
|
|
2098
|
+
# Group 1
|
|
2099
|
+
T1 = np.array([6, 6, 6, 7, 10, 13, 16, 22, 23, 6, 9, 10, 11, 17, 19, 20, 25, 32, 32, 34])
|
|
2100
|
+
E1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
|
2101
|
+
|
|
2102
|
+
# Group 2
|
|
2103
|
+
T2 = np.array([1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22])
|
|
2104
|
+
E2 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])
|
|
2105
|
+
|
|
2106
|
+
fig, ax = plt.subplots()
|
|
2107
|
+
kmf1 = KaplanMeierFitter()
|
|
2108
|
+
kmf1.fit(T1, E1, label='Treatment')
|
|
2109
|
+
kmf1.plot_survival_function(ax=ax, ci_show=True, color='steelblue')
|
|
2110
|
+
|
|
2111
|
+
kmf2 = KaplanMeierFitter()
|
|
2112
|
+
kmf2.fit(T2, E2, label='Control')
|
|
2113
|
+
kmf2.plot_survival_function(ax=ax, ci_show=True, color='coral')
|
|
2114
|
+
|
|
2115
|
+
# Log-rank test
|
|
2116
|
+
result = logrank_test(T1, T2, E1, E2)
|
|
2117
|
+
ax.text(0.6, 0.9, f'Log-rank p = {result.p_value:.4f}',
|
|
2118
|
+
transform=ax.transAxes, fontsize=10,
|
|
2119
|
+
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
|
2120
|
+
|
|
2121
|
+
ax.set_xlabel('Time')
|
|
2122
|
+
ax.set_ylabel('Survival Probability')
|
|
2123
|
+
ax.set_title('${title}')
|
|
2124
|
+
ax.set_ylim(0, 1.05)
|
|
2125
|
+
ax.legend(frameon=True)
|
|
2126
|
+
plt.tight_layout()
|
|
2127
|
+
plt.savefig('km_plot.png', bbox_inches='tight')
|
|
2128
|
+
plt.show()`;
|
|
2129
|
+
}
|
|
2130
|
+
else if (chartType === 'forest') {
|
|
2131
|
+
code = `${preamble}
|
|
2132
|
+
studies = ['Study A', 'Study B', 'Study C', 'Study D', 'Study E', 'Overall']
|
|
2133
|
+
effects = [0.85, 1.12, 0.72, 0.95, 1.05, 0.92]
|
|
2134
|
+
ci_lower = [0.65, 0.88, 0.55, 0.75, 0.82, 0.80]
|
|
2135
|
+
ci_upper = [1.10, 1.42, 0.94, 1.20, 1.34, 1.06]
|
|
2136
|
+
weights = [20, 25, 15, 22, 18, None]
|
|
2137
|
+
|
|
2138
|
+
fig, ax = plt.subplots(figsize=(8, 5))
|
|
2139
|
+
y_pos = np.arange(len(studies))
|
|
2140
|
+
|
|
2141
|
+
for i, (study, eff, lo, hi) in enumerate(zip(studies, effects, ci_lower, ci_upper)):
|
|
2142
|
+
color = 'darkred' if study == 'Overall' else 'steelblue'
|
|
2143
|
+
marker = 'D' if study == 'Overall' else 'o'
|
|
2144
|
+
size = 10 if study == 'Overall' else 7
|
|
2145
|
+
ax.plot(eff, i, marker, color=color, markersize=size, zorder=3)
|
|
2146
|
+
ax.hlines(i, lo, hi, color=color, linewidth=2)
|
|
2147
|
+
label = f'{eff:.2f} [{lo:.2f}, {hi:.2f}]'
|
|
2148
|
+
ax.text(max(ci_upper) + 0.1, i, label, va='center', fontsize=9)
|
|
2149
|
+
|
|
2150
|
+
ax.axvline(1.0, color='gray', linestyle='--', linewidth=0.8)
|
|
2151
|
+
ax.set_yticks(y_pos)
|
|
2152
|
+
ax.set_yticklabels(studies)
|
|
2153
|
+
ax.set_xlabel('Effect Size (OR / HR)')
|
|
2154
|
+
ax.set_title('${title}')
|
|
2155
|
+
ax.invert_yaxis()
|
|
2156
|
+
ax.spines['top'].set_visible(False)
|
|
2157
|
+
ax.spines['right'].set_visible(False)
|
|
2158
|
+
plt.tight_layout()
|
|
2159
|
+
plt.savefig('forest_plot.png', bbox_inches='tight')
|
|
2160
|
+
plt.show()`;
|
|
2161
|
+
}
|
|
2162
|
+
else if (chartType === 'volcano') {
|
|
2163
|
+
code = `${preamble}
|
|
2164
|
+
np.random.seed(42)
|
|
2165
|
+
n_genes = 5000
|
|
2166
|
+
log2fc = np.random.randn(n_genes) * 1.5
|
|
2167
|
+
pvals = 10 ** (-np.abs(np.random.randn(n_genes) * 2))
|
|
2168
|
+
neg_log10p = -np.log10(pvals)
|
|
2169
|
+
|
|
2170
|
+
# Categorize
|
|
2171
|
+
fc_threshold = 1.0
|
|
2172
|
+
p_threshold = 0.05
|
|
2173
|
+
colors = []
|
|
2174
|
+
for fc, p in zip(log2fc, pvals):
|
|
2175
|
+
if abs(fc) > fc_threshold and p < p_threshold:
|
|
2176
|
+
colors.append('red' if fc > 0 else 'blue')
|
|
2177
|
+
else:
|
|
2178
|
+
colors.append('gray')
|
|
2179
|
+
|
|
2180
|
+
fig, ax = plt.subplots()
|
|
2181
|
+
ax.scatter(log2fc, neg_log10p, c=colors, alpha=0.5, s=10, edgecolors='none')
|
|
2182
|
+
ax.axhline(-np.log10(p_threshold), color='gray', linestyle='--', linewidth=0.8)
|
|
2183
|
+
ax.axvline(-fc_threshold, color='gray', linestyle='--', linewidth=0.8)
|
|
2184
|
+
ax.axvline(fc_threshold, color='gray', linestyle='--', linewidth=0.8)
|
|
2185
|
+
|
|
2186
|
+
n_up = sum(1 for c in colors if c == 'red')
|
|
2187
|
+
n_down = sum(1 for c in colors if c == 'blue')
|
|
2188
|
+
ax.text(0.02, 0.98, f'Up: {n_up}\\nDown: {n_down}', transform=ax.transAxes,
|
|
2189
|
+
va='top', fontsize=9, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
|
2190
|
+
|
|
2191
|
+
ax.set_xlabel('log2 Fold Change')
|
|
2192
|
+
ax.set_ylabel('-log10(p-value)')
|
|
2193
|
+
ax.set_title('${title}')
|
|
2194
|
+
plt.tight_layout()
|
|
2195
|
+
plt.savefig('volcano_plot.png', bbox_inches='tight')
|
|
2196
|
+
plt.show()`;
|
|
2197
|
+
}
|
|
2198
|
+
else if (chartType === 'qq') {
|
|
2199
|
+
code = `${preamble}
|
|
2200
|
+
from scipy import stats
|
|
2201
|
+
|
|
2202
|
+
data = np.random.randn(200) * 2 + 5 # Replace with your data
|
|
2203
|
+
|
|
2204
|
+
fig, ax = plt.subplots()
|
|
2205
|
+
(osm, osr), (slope, intercept, r) = stats.probplot(data, dist='norm', plot=ax)
|
|
2206
|
+
ax.get_lines()[0].set(color='steelblue', markersize=4, alpha=0.7)
|
|
2207
|
+
ax.get_lines()[1].set(color='red', linewidth=1.5)
|
|
2208
|
+
|
|
2209
|
+
ax.set_title('${title}')
|
|
2210
|
+
ax.text(0.05, 0.95, f'R² = {r**2:.4f}', transform=ax.transAxes,
|
|
2211
|
+
va='top', fontsize=10, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
|
|
2212
|
+
plt.tight_layout()
|
|
2213
|
+
plt.savefig('qq_plot.png', bbox_inches='tight')
|
|
2214
|
+
plt.show()`;
|
|
2215
|
+
}
|
|
2216
|
+
else {
|
|
2217
|
+
return `**Error**: Unknown chart_type "${chartType}". Supported: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq.`;
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
else {
|
|
2221
|
+
// R: ggplot2
|
|
2222
|
+
const rPreamble = [
|
|
2223
|
+
`library(ggplot2)`,
|
|
2224
|
+
`library(dplyr)`,
|
|
2225
|
+
``,
|
|
2226
|
+
`# ${isPub ? 'Publication' : 'Presentation'} theme`,
|
|
2227
|
+
isPub
|
|
2228
|
+
? `theme_pub <- theme_bw() + theme(text = element_text(family = "serif", size = 10), plot.title = element_text(size = 12, face = "bold"), axis.title = element_text(size = 11), legend.position = "bottom")`
|
|
2229
|
+
: `theme_pub <- theme_minimal() + theme(text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold"), axis.title = element_text(size = 16), legend.position = "bottom")`,
|
|
2230
|
+
``,
|
|
2231
|
+
`# --- Data: ${dataDesc} ---`,
|
|
2232
|
+
`# Replace with your actual data`,
|
|
2233
|
+
].join('\n');
|
|
2234
|
+
if (chartType === 'scatter') {
|
|
2235
|
+
code = `${rPreamble}
|
|
2236
|
+
set.seed(42)
|
|
2237
|
+
df <- data.frame(x = rnorm(100), y = 2.5 * rnorm(100) + rnorm(100) * 0.5)
|
|
2238
|
+
|
|
2239
|
+
p <- ggplot(df, aes(x = x, y = y)) +
|
|
2240
|
+
geom_point(color = "steelblue", alpha = 0.7, size = 2) +
|
|
2241
|
+
geom_smooth(method = "lm", color = "red", linetype = "dashed", se = TRUE, alpha = 0.2) +
|
|
2242
|
+
labs(title = "${title}", x = "X variable", y = "Y variable") +
|
|
2243
|
+
theme_pub
|
|
2244
|
+
|
|
2245
|
+
ggsave("scatter_plot.pdf", p, width = 6, height = 4)
|
|
2246
|
+
print(p)`;
|
|
2247
|
+
}
|
|
2248
|
+
else if (chartType === 'histogram') {
|
|
2249
|
+
code = `${rPreamble}
|
|
2250
|
+
set.seed(42)
|
|
2251
|
+
df <- data.frame(value = rnorm(500))
|
|
2252
|
+
|
|
2253
|
+
p <- ggplot(df, aes(x = value)) +
|
|
2254
|
+
geom_histogram(aes(y = after_stat(density)), bins = 30, fill = "steelblue",
|
|
2255
|
+
color = "white", alpha = 0.8) +
|
|
2256
|
+
stat_function(fun = dnorm, args = list(mean = mean(df$value), sd = sd(df$value)),
|
|
2257
|
+
color = "red", linewidth = 1) +
|
|
2258
|
+
labs(title = "${title}", x = "Value", y = "Density") +
|
|
2259
|
+
theme_pub
|
|
2260
|
+
|
|
2261
|
+
ggsave("histogram.pdf", p, width = 6, height = 4)
|
|
2262
|
+
print(p)`;
|
|
2263
|
+
}
|
|
2264
|
+
else if (chartType === 'heatmap') {
|
|
2265
|
+
code = `${rPreamble}
|
|
2266
|
+
library(reshape2)
|
|
2267
|
+
|
|
2268
|
+
set.seed(42)
|
|
2269
|
+
mat <- matrix(rnorm(100), nrow = 10)
|
|
2270
|
+
colnames(mat) <- paste0("Var", 1:10)
|
|
2271
|
+
rownames(mat) <- paste0("Var", 1:10)
|
|
2272
|
+
df <- melt(mat)
|
|
2273
|
+
|
|
2274
|
+
p <- ggplot(df, aes(x = Var2, y = Var1, fill = value)) +
|
|
2275
|
+
geom_tile(color = "white") +
|
|
2276
|
+
geom_text(aes(label = round(value, 2)), size = 2.5) +
|
|
2277
|
+
scale_fill_gradient2(low = "steelblue", mid = "white", high = "coral", midpoint = 0) +
|
|
2278
|
+
labs(title = "${title}", x = "", y = "", fill = "Value") +
|
|
2279
|
+
theme_pub + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
2280
|
+
|
|
2281
|
+
ggsave("heatmap.pdf", p, width = 8, height = 6)
|
|
2282
|
+
print(p)`;
|
|
2283
|
+
}
|
|
2284
|
+
else if (chartType === 'boxplot') {
|
|
2285
|
+
code = `${rPreamble}
|
|
2286
|
+
set.seed(42)
|
|
2287
|
+
df <- data.frame(
|
|
2288
|
+
Group = rep(c("A", "B", "C", "D"), each = 50),
|
|
2289
|
+
Value = c(rnorm(50), rnorm(50, 1), rnorm(50, 2), rnorm(50, 3))
|
|
2290
|
+
)
|
|
2291
|
+
|
|
2292
|
+
p <- ggplot(df, aes(x = Group, y = Value, fill = Group)) +
|
|
2293
|
+
geom_boxplot(width = 0.6, outlier.shape = 1, alpha = 0.8) +
|
|
2294
|
+
geom_jitter(width = 0.15, alpha = 0.3, size = 1) +
|
|
2295
|
+
scale_fill_brewer(palette = "Set2") +
|
|
2296
|
+
labs(title = "${title}", x = "", y = "Value") +
|
|
2297
|
+
theme_pub + theme(legend.position = "none")
|
|
2298
|
+
|
|
2299
|
+
ggsave("boxplot.pdf", p, width = 6, height = 4)
|
|
2300
|
+
print(p)`;
|
|
2301
|
+
}
|
|
2302
|
+
else if (chartType === 'violin') {
|
|
2303
|
+
code = `${rPreamble}
|
|
2304
|
+
set.seed(42)
|
|
2305
|
+
df <- data.frame(
|
|
2306
|
+
Group = rep(c("A", "B", "C"), each = 100),
|
|
2307
|
+
Value = c(rnorm(100), rnorm(100, 0.5), rnorm(100, 1))
|
|
2308
|
+
)
|
|
2309
|
+
|
|
2310
|
+
p <- ggplot(df, aes(x = Group, y = Value, fill = Group)) +
|
|
2311
|
+
geom_violin(trim = FALSE, alpha = 0.8) +
|
|
2312
|
+
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
|
|
2313
|
+
scale_fill_brewer(palette = "Pastel1") +
|
|
2314
|
+
labs(title = "${title}", x = "", y = "Value") +
|
|
2315
|
+
theme_pub + theme(legend.position = "none")
|
|
2316
|
+
|
|
2317
|
+
ggsave("violin_plot.pdf", p, width = 6, height = 4)
|
|
2318
|
+
print(p)`;
|
|
2319
|
+
}
|
|
2320
|
+
else if (chartType === 'line') {
|
|
2321
|
+
code = `${rPreamble}
|
|
2322
|
+
set.seed(42)
|
|
2323
|
+
df <- data.frame(
|
|
2324
|
+
Time = rep(1:50, 2),
|
|
2325
|
+
Value = c(cumsum(rnorm(50)) + 10, cumsum(rnorm(50)) + 10),
|
|
2326
|
+
Series = rep(c("A", "B"), each = 50)
|
|
2327
|
+
)
|
|
2328
|
+
|
|
2329
|
+
p <- ggplot(df, aes(x = Time, y = Value, color = Series)) +
|
|
2330
|
+
geom_line(linewidth = 0.8) +
|
|
2331
|
+
geom_point(size = 1, alpha = 0.5) +
|
|
2332
|
+
geom_ribbon(aes(ymin = Value - 1, ymax = Value + 1, fill = Series), alpha = 0.15, color = NA) +
|
|
2333
|
+
scale_color_manual(values = c("steelblue", "coral")) +
|
|
2334
|
+
scale_fill_manual(values = c("steelblue", "coral")) +
|
|
2335
|
+
labs(title = "${title}", x = "Time", y = "Value") +
|
|
2336
|
+
theme_pub
|
|
2337
|
+
|
|
2338
|
+
ggsave("line_plot.pdf", p, width = 6, height = 4)
|
|
2339
|
+
print(p)`;
|
|
2340
|
+
}
|
|
2341
|
+
else if (chartType === 'bar') {
|
|
2342
|
+
code = `${rPreamble}
|
|
2343
|
+
df <- data.frame(
|
|
2344
|
+
Category = c("Cat A", "Cat B", "Cat C", "Cat D", "Cat E"),
|
|
2345
|
+
Value = c(23, 45, 56, 78, 32),
|
|
2346
|
+
SE = c(3, 5, 4, 6, 3)
|
|
2347
|
+
)
|
|
2348
|
+
|
|
2349
|
+
p <- ggplot(df, aes(x = reorder(Category, -Value), y = Value)) +
|
|
2350
|
+
geom_col(fill = "steelblue", alpha = 0.85, width = 0.65) +
|
|
2351
|
+
geom_errorbar(aes(ymin = Value - SE, ymax = Value + SE), width = 0.2) +
|
|
2352
|
+
geom_text(aes(label = Value), vjust = -0.5, size = 3.5) +
|
|
2353
|
+
labs(title = "${title}", x = "", y = "Value") +
|
|
2354
|
+
theme_pub
|
|
2355
|
+
|
|
2356
|
+
ggsave("bar_plot.pdf", p, width = 6, height = 4)
|
|
2357
|
+
print(p)`;
|
|
2358
|
+
}
|
|
2359
|
+
else if (chartType === 'kaplan_meier') {
|
|
2360
|
+
code = `${rPreamble}
|
|
2361
|
+
library(survival)
|
|
2362
|
+
library(survminer)
|
|
2363
|
+
|
|
2364
|
+
df <- data.frame(
|
|
2365
|
+
time = c(6, 6, 6, 7, 10, 13, 16, 22, 23, 6, 9, 10, 11, 17, 19, 20, 25, 32, 32, 34,
|
|
2366
|
+
1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22),
|
|
2367
|
+
event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
2368
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0),
|
|
2369
|
+
group = c(rep("Treatment", 20), rep("Control", 20))
|
|
2370
|
+
)
|
|
2371
|
+
|
|
2372
|
+
fit <- survfit(Surv(time, event) ~ group, data = df)
|
|
2373
|
+
p <- ggsurvplot(fit, data = df, pval = TRUE, conf.int = TRUE,
|
|
2374
|
+
risk.table = TRUE, palette = c("steelblue", "coral"),
|
|
2375
|
+
title = "${title}",
|
|
2376
|
+
xlab = "Time", ylab = "Survival Probability",
|
|
2377
|
+
ggtheme = theme_bw() + theme(text = element_text(family = "serif")))
|
|
2378
|
+
|
|
2379
|
+
ggsave("km_plot.pdf", plot = print(p), width = 8, height = 6)`;
|
|
2380
|
+
}
|
|
2381
|
+
else if (chartType === 'forest') {
|
|
2382
|
+
code = `${rPreamble}
|
|
2383
|
+
library(forestplot)
|
|
2384
|
+
|
|
2385
|
+
df <- data.frame(
|
|
2386
|
+
study = c("Study A", "Study B", "Study C", "Study D", "Study E", "Overall"),
|
|
2387
|
+
estimate = c(0.85, 1.12, 0.72, 0.95, 1.05, 0.92),
|
|
2388
|
+
lower = c(0.65, 0.88, 0.55, 0.75, 0.82, 0.80),
|
|
2389
|
+
upper = c(1.10, 1.42, 0.94, 1.20, 1.34, 1.06)
|
|
2390
|
+
)
|
|
2391
|
+
|
|
2392
|
+
p <- ggplot(df, aes(y = reorder(study, desc(row_number())), x = estimate, xmin = lower, xmax = upper)) +
|
|
2393
|
+
geom_pointrange(aes(color = study == "Overall"), size = 0.8) +
|
|
2394
|
+
geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
|
|
2395
|
+
scale_color_manual(values = c("steelblue", "darkred"), guide = "none") +
|
|
2396
|
+
labs(title = "${title}", x = "Effect Size (OR / HR)", y = "") +
|
|
2397
|
+
theme_pub
|
|
2398
|
+
|
|
2399
|
+
ggsave("forest_plot.pdf", p, width = 8, height = 5)
|
|
2400
|
+
print(p)`;
|
|
2401
|
+
}
|
|
2402
|
+
else if (chartType === 'volcano') {
|
|
2403
|
+
code = `${rPreamble}
|
|
2404
|
+
set.seed(42)
|
|
2405
|
+
df <- data.frame(
|
|
2406
|
+
log2fc = rnorm(5000) * 1.5,
|
|
2407
|
+
pvalue = 10^(-abs(rnorm(5000) * 2))
|
|
2408
|
+
) %>%
|
|
2409
|
+
mutate(
|
|
2410
|
+
neg_log10p = -log10(pvalue),
|
|
2411
|
+
category = case_when(
|
|
2412
|
+
abs(log2fc) > 1 & pvalue < 0.05 & log2fc > 0 ~ "Up",
|
|
2413
|
+
abs(log2fc) > 1 & pvalue < 0.05 & log2fc < 0 ~ "Down",
|
|
2414
|
+
TRUE ~ "NS"
|
|
2415
|
+
)
|
|
2416
|
+
)
|
|
2417
|
+
|
|
2418
|
+
p <- ggplot(df, aes(x = log2fc, y = neg_log10p, color = category)) +
|
|
2419
|
+
geom_point(alpha = 0.5, size = 1) +
|
|
2420
|
+
scale_color_manual(values = c("Down" = "blue", "NS" = "gray70", "Up" = "red")) +
|
|
2421
|
+
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
|
|
2422
|
+
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "gray50") +
|
|
2423
|
+
labs(title = "${title}", x = "log2 Fold Change", y = "-log10(p-value)", color = "") +
|
|
2424
|
+
theme_pub
|
|
2425
|
+
|
|
2426
|
+
ggsave("volcano_plot.pdf", p, width = 6, height = 5)
|
|
2427
|
+
print(p)`;
|
|
2428
|
+
}
|
|
2429
|
+
else if (chartType === 'qq') {
|
|
2430
|
+
code = `${rPreamble}
|
|
2431
|
+
set.seed(42)
|
|
2432
|
+
data <- rnorm(200) * 2 + 5 # Replace with your data
|
|
2433
|
+
|
|
2434
|
+
df <- data.frame(value = data)
|
|
2435
|
+
|
|
2436
|
+
p <- ggplot(df, aes(sample = value)) +
|
|
2437
|
+
stat_qq(color = "steelblue", alpha = 0.7, size = 2) +
|
|
2438
|
+
stat_qq_line(color = "red", linewidth = 1) +
|
|
2439
|
+
labs(title = "${title}", x = "Theoretical Quantiles", y = "Sample Quantiles") +
|
|
2440
|
+
theme_pub
|
|
2441
|
+
|
|
2442
|
+
shapiro <- shapiro.test(data)
|
|
2443
|
+
p <- p + annotate("text", x = -Inf, y = Inf, hjust = -0.1, vjust = 1.5,
|
|
2444
|
+
label = paste0("Shapiro-Wilk p = ", format.pval(shapiro$p.value, digits = 4)),
|
|
2445
|
+
size = 3.5)
|
|
2446
|
+
|
|
2447
|
+
ggsave("qq_plot.pdf", p, width = 6, height = 4)
|
|
2448
|
+
print(p)`;
|
|
2449
|
+
}
|
|
2450
|
+
else {
|
|
2451
|
+
return `**Error**: Unknown chart_type "${chartType}". Supported: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq.`;
|
|
2452
|
+
}
|
|
2453
|
+
}
|
|
2454
|
+
let out = `## Visualization Code — ${chartType} (${language === 'python' ? 'Python' : 'R'})\n\n`;
|
|
2455
|
+
out += `**Style:** ${isPub ? 'Publication' : 'Presentation'} | **Data:** ${dataDesc}\n\n`;
|
|
2456
|
+
out += '```' + (language === 'python' ? 'python' : 'r') + '\n';
|
|
2457
|
+
out += code + '\n';
|
|
2458
|
+
out += '```\n';
|
|
2459
|
+
out += `\n*Copy this code and replace the sample data with your actual data. The plot will be saved to the working directory.*\n`;
|
|
2460
|
+
return out;
|
|
2461
|
+
},
|
|
2462
|
+
});
|
|
2463
|
+
}
|
|
2464
|
+
//# sourceMappingURL=lab-data.js.map
|