@kernel.chat/kbot 3.41.0 → 3.42.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +5 -5
  2. package/dist/agent-teams.d.ts +1 -1
  3. package/dist/agent-teams.d.ts.map +1 -1
  4. package/dist/agent-teams.js +36 -3
  5. package/dist/agent-teams.js.map +1 -1
  6. package/dist/agents/specialists.d.ts.map +1 -1
  7. package/dist/agents/specialists.js +20 -0
  8. package/dist/agents/specialists.js.map +1 -1
  9. package/dist/channels/kbot-channel.js +8 -31
  10. package/dist/channels/kbot-channel.js.map +1 -1
  11. package/dist/cli.js +8 -8
  12. package/dist/digest.js +1 -1
  13. package/dist/digest.js.map +1 -1
  14. package/dist/email-service.d.ts.map +1 -1
  15. package/dist/email-service.js +1 -2
  16. package/dist/email-service.js.map +1 -1
  17. package/dist/episodic-memory.d.ts.map +1 -1
  18. package/dist/episodic-memory.js +14 -0
  19. package/dist/episodic-memory.js.map +1 -1
  20. package/dist/learned-router.d.ts.map +1 -1
  21. package/dist/learned-router.js +29 -0
  22. package/dist/learned-router.js.map +1 -1
  23. package/dist/tools/email.d.ts.map +1 -1
  24. package/dist/tools/email.js +2 -3
  25. package/dist/tools/email.js.map +1 -1
  26. package/dist/tools/index.d.ts.map +1 -1
  27. package/dist/tools/index.js +7 -1
  28. package/dist/tools/index.js.map +1 -1
  29. package/dist/tools/lab-bio.d.ts +2 -0
  30. package/dist/tools/lab-bio.d.ts.map +1 -0
  31. package/dist/tools/lab-bio.js +1392 -0
  32. package/dist/tools/lab-bio.js.map +1 -0
  33. package/dist/tools/lab-chem.d.ts +2 -0
  34. package/dist/tools/lab-chem.d.ts.map +1 -0
  35. package/dist/tools/lab-chem.js +1257 -0
  36. package/dist/tools/lab-chem.js.map +1 -0
  37. package/dist/tools/lab-core.d.ts +2 -0
  38. package/dist/tools/lab-core.d.ts.map +1 -0
  39. package/dist/tools/lab-core.js +2452 -0
  40. package/dist/tools/lab-core.js.map +1 -0
  41. package/dist/tools/lab-data.d.ts +2 -0
  42. package/dist/tools/lab-data.d.ts.map +1 -0
  43. package/dist/tools/lab-data.js +2464 -0
  44. package/dist/tools/lab-data.js.map +1 -0
  45. package/dist/tools/lab-earth.d.ts +2 -0
  46. package/dist/tools/lab-earth.d.ts.map +1 -0
  47. package/dist/tools/lab-earth.js +1124 -0
  48. package/dist/tools/lab-earth.js.map +1 -0
  49. package/dist/tools/lab-math.d.ts +2 -0
  50. package/dist/tools/lab-math.d.ts.map +1 -0
  51. package/dist/tools/lab-math.js +3021 -0
  52. package/dist/tools/lab-math.js.map +1 -0
  53. package/dist/tools/lab-physics.d.ts +2 -0
  54. package/dist/tools/lab-physics.d.ts.map +1 -0
  55. package/dist/tools/lab-physics.js +2423 -0
  56. package/dist/tools/lab-physics.js.map +1 -0
  57. package/package.json +2 -3
@@ -0,0 +1,2464 @@
1
+ // kbot Data Analysis & Statistics Tools — Regression, Bayesian, time series, PCA, and more.
2
+ // All computations are pure TypeScript — zero external dependencies.
3
+ // Implements numerical methods: normal equations, eigendecomposition, MLE, KS test, etc.
4
+ import { registerTool } from './index.js';
5
+ // ══════════════════════════════════════════════════════════════════════════════
6
+ // SHARED MATH UTILITIES
7
+ // ══════════════════════════════════════════════════════════════════════════════
8
+ /** Format a number to fixed decimal places */
9
+ function fmt(n, d = 4) {
10
+ if (!isFinite(n))
11
+ return String(n);
12
+ return n.toFixed(d);
13
+ }
14
+ /** Mean of an array */
15
+ function mean(arr) {
16
+ if (arr.length === 0)
17
+ return 0;
18
+ return arr.reduce((s, v) => s + v, 0) / arr.length;
19
+ }
20
+ /** Variance (sample) */
21
+ function variance(arr) {
22
+ if (arr.length < 2)
23
+ return 0;
24
+ const m = mean(arr);
25
+ return arr.reduce((s, v) => s + (v - m) ** 2, 0) / (arr.length - 1);
26
+ }
27
+ /** Standard deviation (sample) */
28
+ function stddev(arr) {
29
+ return Math.sqrt(variance(arr));
30
+ }
31
+ /** Median */
32
+ function median(arr) {
33
+ const sorted = [...arr].sort((a, b) => a - b);
34
+ const mid = Math.floor(sorted.length / 2);
35
+ return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
36
+ }
37
+ /** Sum */
38
+ function sum(arr) {
39
+ return arr.reduce((s, v) => s + v, 0);
40
+ }
41
+ /** Normal CDF — Abramowitz & Stegun rational approximation (formula 26.2.17) */
42
+ function normalCDF(x) {
43
+ if (x === Infinity)
44
+ return 1;
45
+ if (x === -Infinity)
46
+ return 0;
47
+ const a1 = 0.254829592;
48
+ const a2 = -0.284496736;
49
+ const a3 = 1.421413741;
50
+ const a4 = -1.453152027;
51
+ const a5 = 1.061405429;
52
+ const p = 0.3275911;
53
+ const sign = x < 0 ? -1 : 1;
54
+ const z = Math.abs(x) / Math.SQRT2;
55
+ const t = 1.0 / (1.0 + p * z);
56
+ const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-z * z);
57
+ return 0.5 * (1.0 + sign * y);
58
+ }
59
+ /** Normal PDF */
60
+ function normalPDF(x) {
61
+ return Math.exp(-0.5 * x * x) / Math.sqrt(2 * Math.PI);
62
+ }
63
+ /** Normal quantile (inverse CDF) via rational approximation */
64
+ function normalQuantile(p) {
65
+ if (p <= 0)
66
+ return -Infinity;
67
+ if (p >= 1)
68
+ return Infinity;
69
+ if (p === 0.5)
70
+ return 0;
71
+ // Rational approximation for central region
72
+ const a = [
73
+ -3.969683028665376e1, 2.209460984245205e2,
74
+ -2.759285104469687e2, 1.383577518672690e2,
75
+ -3.066479806614716e1, 2.506628277459239e0,
76
+ ];
77
+ const b = [
78
+ -5.447609879822406e1, 1.615858368580409e2,
79
+ -1.556989798598866e2, 6.680131188771972e1,
80
+ -1.328068155288572e1,
81
+ ];
82
+ const c = [
83
+ -7.784894002430293e-3, -3.223964580411365e-1,
84
+ -2.400758277161838e0, -2.549732539343734e0,
85
+ 4.374664141464968e0, 2.938163982698783e0,
86
+ ];
87
+ const d = [
88
+ 7.784695709041462e-3, 3.224671290700398e-1,
89
+ 2.445134137142996e0, 3.754408661907416e0,
90
+ ];
91
+ const pLow = 0.02425;
92
+ const pHigh = 1 - pLow;
93
+ let q, r;
94
+ if (p < pLow) {
95
+ q = Math.sqrt(-2 * Math.log(p));
96
+ return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) /
97
+ ((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1);
98
+ }
99
+ else if (p <= pHigh) {
100
+ q = p - 0.5;
101
+ r = q * q;
102
+ return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q /
103
+ (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1);
104
+ }
105
+ else {
106
+ q = Math.sqrt(-2 * Math.log(1 - p));
107
+ const num = ((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5];
108
+ const den = (((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1;
109
+ return -(num / den);
110
+ }
111
+ }
112
+ /** Log-gamma function (Lanczos approximation) */
113
+ function logGamma(x) {
114
+ if (x <= 0)
115
+ return Infinity;
116
+ const g = 7;
117
+ const coef = [
118
+ 0.99999999999980993, 676.5203681218851, -1259.1392167224028,
119
+ 771.32342877765313, -176.61502916214059, 12.507343278686905,
120
+ -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7,
121
+ ];
122
+ let sum = coef[0];
123
+ for (let i = 1; i < g + 2; i++) {
124
+ sum += coef[i] / (x + i - 1);
125
+ }
126
+ const t = x + g - 0.5;
127
+ return 0.5 * Math.log(2 * Math.PI) + (x - 0.5) * Math.log(t) - t + Math.log(sum);
128
+ }
129
+ /** Gamma function */
130
+ function gamma(x) {
131
+ return Math.exp(logGamma(x));
132
+ }
133
+ /** Regularized lower incomplete gamma function P(a, x) via series expansion */
134
+ function lowerIncompleteGammaP(a, x) {
135
+ if (x < 0)
136
+ return 0;
137
+ if (x === 0)
138
+ return 0;
139
+ if (a <= 0)
140
+ return 1;
141
+ // For x < a+1, use series expansion
142
+ if (x < a + 1) {
143
+ let term = 1 / a;
144
+ let sum = term;
145
+ for (let n = 1; n < 200; n++) {
146
+ term *= x / (a + n);
147
+ sum += term;
148
+ if (Math.abs(term) < 1e-14 * Math.abs(sum))
149
+ break;
150
+ }
151
+ return sum * Math.exp(-x + a * Math.log(x) - logGamma(a));
152
+ }
153
+ // For x >= a+1, use continued fraction (upper gamma) then subtract
154
+ return 1 - upperIncompleteGammaQ(a, x);
155
+ }
156
+ /** Regularized upper incomplete gamma function Q(a, x) via continued fraction */
157
+ function upperIncompleteGammaQ(a, x) {
158
+ if (x < 0)
159
+ return 1;
160
+ if (x === 0)
161
+ return 1;
162
+ // Continued fraction via Lentz's method
163
+ let f = x + 1 - a;
164
+ if (Math.abs(f) < 1e-30)
165
+ f = 1e-30;
166
+ let C = f;
167
+ let D = 0;
168
+ for (let i = 1; i < 200; i++) {
169
+ const an = i * (a - i);
170
+ const bn = x + 2 * i + 1 - a;
171
+ D = bn + an * D;
172
+ if (Math.abs(D) < 1e-30)
173
+ D = 1e-30;
174
+ C = bn + an / C;
175
+ if (Math.abs(C) < 1e-30)
176
+ C = 1e-30;
177
+ D = 1 / D;
178
+ const delta = C * D;
179
+ f *= delta;
180
+ if (Math.abs(delta - 1) < 1e-14)
181
+ break;
182
+ }
183
+ return Math.exp(-x + a * Math.log(x) - logGamma(a)) / f;
184
+ }
185
+ /** Chi-square CDF */
186
+ function chiSquareCDF(x, df) {
187
+ if (x <= 0)
188
+ return 0;
189
+ return lowerIncompleteGammaP(df / 2, x / 2);
190
+ }
191
+ /** Regularized incomplete beta function I_x(a, b) via continued fraction */
192
+ function incompleteBeta(x, a, b) {
193
+ if (x <= 0)
194
+ return 0;
195
+ if (x >= 1)
196
+ return 1;
197
+ // Symmetry relation for numerical stability
198
+ if (x > (a + 1) / (a + b + 2)) {
199
+ return 1 - incompleteBeta(1 - x, b, a);
200
+ }
201
+ const lnBeta = logGamma(a) + logGamma(b) - logGamma(a + b);
202
+ const front = Math.exp(a * Math.log(x) + b * Math.log(1 - x) - lnBeta) / a;
203
+ // Lentz's continued fraction
204
+ let f = 1, C = 1, D = 0;
205
+ for (let m = 0; m <= 200; m++) {
206
+ let numerator;
207
+ if (m === 0) {
208
+ numerator = 1;
209
+ }
210
+ else if (m % 2 === 0) {
211
+ const k = m / 2;
212
+ numerator = (k * (b - k) * x) / ((a + 2 * k - 1) * (a + 2 * k));
213
+ }
214
+ else {
215
+ const k = (m - 1) / 2;
216
+ numerator = -((a + k) * (a + b + k) * x) / ((a + 2 * k) * (a + 2 * k + 1));
217
+ }
218
+ D = 1 + numerator * D;
219
+ if (Math.abs(D) < 1e-30)
220
+ D = 1e-30;
221
+ D = 1 / D;
222
+ C = 1 + numerator / C;
223
+ if (Math.abs(C) < 1e-30)
224
+ C = 1e-30;
225
+ f *= C * D;
226
+ if (Math.abs(C * D - 1) < 1e-14)
227
+ break;
228
+ }
229
+ return front * f;
230
+ }
231
+ /** Student's t CDF */
232
+ function tCDF(t, df) {
233
+ if (df <= 0)
234
+ return NaN;
235
+ const x = df / (df + t * t);
236
+ const ibeta = incompleteBeta(x, df / 2, 0.5);
237
+ if (t >= 0) {
238
+ return 1 - 0.5 * ibeta;
239
+ }
240
+ else {
241
+ return 0.5 * ibeta;
242
+ }
243
+ }
244
+ /** F-distribution CDF */
245
+ function fCDF(x, df1, df2) {
246
+ if (x <= 0)
247
+ return 0;
248
+ const z = (df1 * x) / (df1 * x + df2);
249
+ return incompleteBeta(z, df1 / 2, df2 / 2);
250
+ }
251
+ /** Parse comma-separated string to number array */
252
+ function parseCSV(s) {
253
+ return s.split(',').map(v => parseFloat(v.trim())).filter(v => isFinite(v));
254
+ }
255
+ /** Rank data (average ranks for ties) */
256
+ function rank(data) {
257
+ const indexed = data.map((v, i) => ({ v, i }));
258
+ indexed.sort((a, b) => a.v - b.v);
259
+ const ranks = new Array(data.length);
260
+ let i = 0;
261
+ while (i < indexed.length) {
262
+ let j = i;
263
+ while (j < indexed.length && indexed[j].v === indexed[i].v)
264
+ j++;
265
+ const avgRank = (i + j + 1) / 2; // average of 1-based ranks
266
+ for (let k = i; k < j; k++)
267
+ ranks[indexed[k].i] = avgRank;
268
+ i = j;
269
+ }
270
+ return ranks;
271
+ }
272
+ function matCreate(rows, cols, fill = 0) {
273
+ return Array.from({ length: rows }, () => new Array(cols).fill(fill));
274
+ }
275
+ function matTranspose(A) {
276
+ const rows = A.length, cols = A[0].length;
277
+ const T = matCreate(cols, rows);
278
+ for (let i = 0; i < rows; i++)
279
+ for (let j = 0; j < cols; j++)
280
+ T[j][i] = A[i][j];
281
+ return T;
282
+ }
283
+ function matMul(A, B) {
284
+ const aRows = A.length, aCols = A[0].length, bCols = B[0].length;
285
+ const C = matCreate(aRows, bCols);
286
+ for (let i = 0; i < aRows; i++)
287
+ for (let j = 0; j < bCols; j++)
288
+ for (let k = 0; k < aCols; k++)
289
+ C[i][j] += A[i][k] * B[k][j];
290
+ return C;
291
+ }
292
+ /** Solve Ax = b via Gaussian elimination with partial pivoting */
293
+ function matSolve(A, b) {
294
+ const n = A.length;
295
+ // Augmented matrix
296
+ const aug = A.map((row, i) => [...row, b[i]]);
297
+ for (let col = 0; col < n; col++) {
298
+ // Partial pivoting
299
+ let maxRow = col;
300
+ for (let row = col + 1; row < n; row++) {
301
+ if (Math.abs(aug[row][col]) > Math.abs(aug[maxRow][col]))
302
+ maxRow = row;
303
+ }
304
+ ;
305
+ [aug[col], aug[maxRow]] = [aug[maxRow], aug[col]];
306
+ if (Math.abs(aug[col][col]) < 1e-12) {
307
+ throw new Error('Matrix is singular or nearly singular');
308
+ }
309
+ // Eliminate below
310
+ for (let row = col + 1; row < n; row++) {
311
+ const factor = aug[row][col] / aug[col][col];
312
+ for (let j = col; j <= n; j++)
313
+ aug[row][j] -= factor * aug[col][j];
314
+ }
315
+ }
316
+ // Back substitution
317
+ const x = new Array(n).fill(0);
318
+ for (let i = n - 1; i >= 0; i--) {
319
+ x[i] = aug[i][n];
320
+ for (let j = i + 1; j < n; j++)
321
+ x[i] -= aug[i][j] * x[j];
322
+ x[i] /= aug[i][i];
323
+ }
324
+ return x;
325
+ }
326
+ /** Matrix inverse via Gauss-Jordan elimination */
327
+ function matInverse(A) {
328
+ const n = A.length;
329
+ const aug = A.map((row, i) => {
330
+ const ext = new Array(n).fill(0);
331
+ ext[i] = 1;
332
+ return [...row, ...ext];
333
+ });
334
+ for (let col = 0; col < n; col++) {
335
+ let maxRow = col;
336
+ for (let row = col + 1; row < n; row++) {
337
+ if (Math.abs(aug[row][col]) > Math.abs(aug[maxRow][col]))
338
+ maxRow = row;
339
+ }
340
+ ;
341
+ [aug[col], aug[maxRow]] = [aug[maxRow], aug[col]];
342
+ if (Math.abs(aug[col][col]) < 1e-12)
343
+ throw new Error('Singular matrix');
344
+ const pivot = aug[col][col];
345
+ for (let j = 0; j < 2 * n; j++)
346
+ aug[col][j] /= pivot;
347
+ for (let row = 0; row < n; row++) {
348
+ if (row === col)
349
+ continue;
350
+ const factor = aug[row][col];
351
+ for (let j = 0; j < 2 * n; j++)
352
+ aug[row][j] -= factor * aug[col][j];
353
+ }
354
+ }
355
+ return aug.map(row => row.slice(n));
356
+ }
357
+ /** Eigenvalues and eigenvectors of a symmetric matrix via Jacobi iteration */
358
+ function symmetricEigen(A) {
359
+ const n = A.length;
360
+ const S = A.map(row => [...row]);
361
+ const V = matCreate(n, n);
362
+ for (let i = 0; i < n; i++)
363
+ V[i][i] = 1;
364
+ const maxIter = 100 * n * n;
365
+ for (let iter = 0; iter < maxIter; iter++) {
366
+ // Find largest off-diagonal element
367
+ let maxVal = 0, p = 0, q = 1;
368
+ for (let i = 0; i < n; i++) {
369
+ for (let j = i + 1; j < n; j++) {
370
+ if (Math.abs(S[i][j]) > maxVal) {
371
+ maxVal = Math.abs(S[i][j]);
372
+ p = i;
373
+ q = j;
374
+ }
375
+ }
376
+ }
377
+ if (maxVal < 1e-12)
378
+ break;
379
+ // Compute rotation
380
+ const theta = (S[q][q] - S[p][p]) / (2 * S[p][q]);
381
+ const t = Math.sign(theta) / (Math.abs(theta) + Math.sqrt(theta * theta + 1));
382
+ const c = 1 / Math.sqrt(t * t + 1);
383
+ const s = t * c;
384
+ // Apply rotation to S
385
+ const Spp = S[p][p], Sqq = S[q][q], Spq = S[p][q];
386
+ S[p][p] = c * c * Spp - 2 * s * c * Spq + s * s * Sqq;
387
+ S[q][q] = s * s * Spp + 2 * s * c * Spq + c * c * Sqq;
388
+ S[p][q] = 0;
389
+ S[q][p] = 0;
390
+ for (let i = 0; i < n; i++) {
391
+ if (i !== p && i !== q) {
392
+ const Sip = S[i][p], Siq = S[i][q];
393
+ S[i][p] = c * Sip - s * Siq;
394
+ S[p][i] = S[i][p];
395
+ S[i][q] = s * Sip + c * Siq;
396
+ S[q][i] = S[i][q];
397
+ }
398
+ }
399
+ // Accumulate eigenvectors
400
+ for (let i = 0; i < n; i++) {
401
+ const Vip = V[i][p], Viq = V[i][q];
402
+ V[i][p] = c * Vip - s * Viq;
403
+ V[i][q] = s * Vip + c * Viq;
404
+ }
405
+ }
406
+ const values = Array.from({ length: n }, (_, i) => S[i][i]);
407
+ return { values, vectors: V };
408
+ }
409
+ // ══════════════════════════════════════════════════════════════════════════════
410
+ // TOOL IMPLEMENTATIONS
411
+ // ══════════════════════════════════════════════════════════════════════════════
412
+ export function registerLabDataTools() {
413
+ // ── 1. REGRESSION ANALYSIS ──
414
+ registerTool({
415
+ name: 'regression_analysis',
416
+ description: 'Perform regression analysis — linear, polynomial, logistic, or exponential. Implements least squares via normal equations. Returns coefficients, R², adjusted R², p-values, and residual analysis.',
417
+ parameters: {
418
+ x_data: { type: 'string', description: 'X values (comma-separated numbers)', required: true },
419
+ y_data: { type: 'string', description: 'Y values (comma-separated numbers)', required: true },
420
+ model_type: { type: 'string', description: 'Model type: linear, polynomial, logistic, or exponential', required: true },
421
+ degree: { type: 'number', description: 'Polynomial degree (default 2, only for polynomial model)' },
422
+ },
423
+ tier: 'free',
424
+ async execute(args) {
425
+ const x = parseCSV(String(args.x_data));
426
+ const y = parseCSV(String(args.y_data));
427
+ if (x.length !== y.length || x.length < 2) {
428
+ return '**Error**: x_data and y_data must have the same length (minimum 2 points).';
429
+ }
430
+ const modelType = String(args.model_type).toLowerCase();
431
+ const degree = Number(args.degree) || 2;
432
+ const n = x.length;
433
+ let coefficients;
434
+ let yPred;
435
+ let paramNames;
436
+ if (modelType === 'linear') {
437
+ // y = b0 + b1*x — normal equations: (X'X)^-1 X'y
438
+ const X = x.map(xi => [1, xi]);
439
+ const Xt = matTranspose(X);
440
+ const XtX = matMul(Xt, X);
441
+ const Xty = matMul(Xt, y.map(yi => [yi]));
442
+ coefficients = matSolve(XtX, Xty.map(r => r[0]));
443
+ yPred = x.map(xi => coefficients[0] + coefficients[1] * xi);
444
+ paramNames = ['intercept', 'slope'];
445
+ }
446
+ else if (modelType === 'polynomial') {
447
+ // y = b0 + b1*x + b2*x^2 + ... + bd*x^d
448
+ const deg = Math.min(degree, n - 1);
449
+ const X = x.map(xi => {
450
+ const row = [];
451
+ for (let d = 0; d <= deg; d++)
452
+ row.push(xi ** d);
453
+ return row;
454
+ });
455
+ const Xt = matTranspose(X);
456
+ const XtX = matMul(Xt, X);
457
+ const Xty = matMul(Xt, y.map(yi => [yi]));
458
+ coefficients = matSolve(XtX, Xty.map(r => r[0]));
459
+ yPred = x.map(xi => {
460
+ let val = 0;
461
+ for (let d = 0; d <= deg; d++)
462
+ val += coefficients[d] * (xi ** d);
463
+ return val;
464
+ });
465
+ paramNames = Array.from({ length: deg + 1 }, (_, i) => i === 0 ? 'intercept' : `x^${i}`);
466
+ }
467
+ else if (modelType === 'exponential') {
468
+ // y = a * e^(b*x) — linearize: ln(y) = ln(a) + b*x
469
+ const yLog = y.map(yi => Math.log(Math.max(yi, 1e-10)));
470
+ const X = x.map(xi => [1, xi]);
471
+ const Xt = matTranspose(X);
472
+ const XtX = matMul(Xt, X);
473
+ const Xty = matMul(Xt, yLog.map(yi => [yi]));
474
+ const linCoeffs = matSolve(XtX, Xty.map(r => r[0]));
475
+ coefficients = [Math.exp(linCoeffs[0]), linCoeffs[1]];
476
+ yPred = x.map(xi => coefficients[0] * Math.exp(coefficients[1] * xi));
477
+ paramNames = ['a (amplitude)', 'b (rate)'];
478
+ }
479
+ else if (modelType === 'logistic') {
480
+ // y = L / (1 + e^(-k*(x - x0))) — simplified: L=1, fit via iterative least squares
481
+ // Initialize with reasonable guesses
482
+ const L = Math.max(...y) * 1.1;
483
+ let k = 1;
484
+ let x0 = median(x);
485
+ // Gradient descent
486
+ const lr = 0.001;
487
+ for (let iter = 0; iter < 2000; iter++) {
488
+ let dL = 0, dk = 0, dx0 = 0;
489
+ for (let i = 0; i < n; i++) {
490
+ const exponent = -k * (x[i] - x0);
491
+ const expVal = Math.exp(Math.max(-500, Math.min(500, exponent)));
492
+ const pred = L / (1 + expVal);
493
+ const err = y[i] - pred;
494
+ const denom = (1 + expVal) ** 2;
495
+ dL += -err / (1 + expVal) * (-1);
496
+ dk += -err * L * (x[i] - x0) * expVal / denom * (-1);
497
+ dx0 += -err * L * (-k) * expVal / denom * (-1);
498
+ }
499
+ k -= lr * dk / n;
500
+ x0 -= lr * dx0 / n;
501
+ }
502
+ coefficients = [L, k, x0];
503
+ yPred = x.map(xi => {
504
+ const exponent = -k * (xi - x0);
505
+ return L / (1 + Math.exp(Math.max(-500, Math.min(500, exponent))));
506
+ });
507
+ paramNames = ['L (supremum)', 'k (steepness)', 'x0 (midpoint)'];
508
+ }
509
+ else {
510
+ return `**Error**: Unknown model_type "${modelType}". Use: linear, polynomial, logistic, exponential.`;
511
+ }
512
+ // R² and adjusted R²
513
+ const yMean = mean(y);
514
+ const ssTot = y.reduce((s, yi) => s + (yi - yMean) ** 2, 0);
515
+ const ssRes = y.reduce((s, yi, i) => s + (yi - yPred[i]) ** 2, 0);
516
+ const rSquared = ssTot > 0 ? 1 - ssRes / ssTot : 0;
517
+ const p = coefficients.length; // number of parameters
518
+ const adjRSquared = 1 - ((1 - rSquared) * (n - 1)) / (n - p - 1);
519
+ // Residuals
520
+ const residuals = y.map((yi, i) => yi - yPred[i]);
521
+ const mse = ssRes / (n - p);
522
+ const rmse = Math.sqrt(mse);
523
+ // Standard errors and t-statistics for linear/polynomial
524
+ let pValues = [];
525
+ if (modelType === 'linear' || modelType === 'polynomial') {
526
+ const deg = modelType === 'linear' ? 1 : Math.min(degree, n - 1);
527
+ const X = x.map(xi => {
528
+ const row = [];
529
+ for (let d = 0; d <= deg; d++)
530
+ row.push(xi ** d);
531
+ return row;
532
+ });
533
+ try {
534
+ const Xt = matTranspose(X);
535
+ const XtXInv = matInverse(matMul(Xt, X));
536
+ const se = coefficients.map((_, j) => Math.sqrt(Math.max(0, mse * XtXInv[j][j])));
537
+ const tStats = coefficients.map((c, j) => se[j] > 0 ? c / se[j] : 0);
538
+ pValues = tStats.map(t => {
539
+ const pVal = 2 * (1 - tCDF(Math.abs(t), n - p));
540
+ return pVal < 0.001 ? '<0.001' : fmt(pVal, 4);
541
+ });
542
+ }
543
+ catch {
544
+ pValues = coefficients.map(() => 'N/A');
545
+ }
546
+ }
547
+ else {
548
+ pValues = coefficients.map(() => 'N/A (nonlinear)');
549
+ }
550
+ // F-statistic
551
+ const fStat = ssTot > 0 && p > 1 ? ((ssTot - ssRes) / (p - 1)) / mse : 0;
552
+ const fPValue = fStat > 0 ? 1 - fCDF(fStat, p - 1, n - p) : 1;
553
+ // Residual analysis
554
+ const residMean = mean(residuals);
555
+ const residStd = stddev(residuals);
556
+ const sortedResid = [...residuals].sort((a, b) => a - b);
557
+ const durbin_watson = residuals.slice(1).reduce((s, r, i) => s + (r - residuals[i]) ** 2, 0) / ssRes;
558
+ // Equation string
559
+ let equation = '';
560
+ if (modelType === 'linear') {
561
+ equation = `y = ${fmt(coefficients[0])} + ${fmt(coefficients[1])} * x`;
562
+ }
563
+ else if (modelType === 'polynomial') {
564
+ equation = coefficients.map((c, i) => i === 0 ? fmt(c) : `${fmt(c)} * x^${i}`).join(' + ');
565
+ equation = `y = ${equation}`;
566
+ }
567
+ else if (modelType === 'exponential') {
568
+ equation = `y = ${fmt(coefficients[0])} * e^(${fmt(coefficients[1])} * x)`;
569
+ }
570
+ else if (modelType === 'logistic') {
571
+ equation = `y = ${fmt(coefficients[0])} / (1 + e^(-${fmt(coefficients[1])} * (x - ${fmt(coefficients[2])})))`;
572
+ }
573
+ let out = `## Regression Analysis — ${modelType.charAt(0).toUpperCase() + modelType.slice(1)}\n\n`;
574
+ out += `**Equation:** \`${equation}\`\n\n`;
575
+ out += `### Coefficients\n\n`;
576
+ out += `| Parameter | Value | p-value |\n|---|---|---|\n`;
577
+ coefficients.forEach((c, i) => {
578
+ out += `| ${paramNames[i]} | ${fmt(c, 6)} | ${pValues[i]} |\n`;
579
+ });
580
+ out += `\n### Model Fit\n\n`;
581
+ out += `| Metric | Value |\n|---|---|\n`;
582
+ out += `| R² | ${fmt(rSquared, 6)} |\n`;
583
+ out += `| Adjusted R² | ${fmt(adjRSquared, 6)} |\n`;
584
+ out += `| RMSE | ${fmt(rmse, 6)} |\n`;
585
+ out += `| F-statistic | ${fmt(fStat, 4)} (p = ${fPValue < 0.001 ? '<0.001' : fmt(fPValue, 4)}) |\n`;
586
+ out += `| Durbin-Watson | ${fmt(durbin_watson, 4)} |\n`;
587
+ out += `| N | ${n} |\n`;
588
+ out += `\n### Residual Summary\n\n`;
589
+ out += `| Statistic | Value |\n|---|---|\n`;
590
+ out += `| Mean | ${fmt(residMean, 6)} |\n`;
591
+ out += `| Std Dev | ${fmt(residStd, 6)} |\n`;
592
+ out += `| Min | ${fmt(sortedResid[0], 6)} |\n`;
593
+ out += `| Median | ${fmt(median(residuals), 6)} |\n`;
594
+ out += `| Max | ${fmt(sortedResid[sortedResid.length - 1], 6)} |\n`;
595
+ return out;
596
+ },
597
+ });
598
+ // ── 2. BAYESIAN INFERENCE ──
599
+ registerTool({
600
+ name: 'bayesian_inference',
601
+ description: 'Conjugate prior Bayesian analysis. Supports Beta-Binomial, Normal-Normal, Gamma-Poisson, and Dirichlet-Multinomial models. Returns posterior parameters, credible intervals, and Bayes factor.',
602
+ parameters: {
603
+ prior_type: { type: 'string', description: 'Prior distribution: beta-binomial, normal-normal, gamma-poisson, dirichlet-multinomial', required: true },
604
+ prior_params: { type: 'string', description: 'Prior parameters as JSON (e.g. {"alpha": 1, "beta": 1} for Beta)', required: true },
605
+ likelihood_type: { type: 'string', description: 'Likelihood type (binomial, normal, poisson, multinomial)', required: true },
606
+ observations: { type: 'string', description: 'Observed data as comma-separated values or JSON', required: true },
607
+ },
608
+ tier: 'free',
609
+ async execute(args) {
610
+ const priorType = String(args.prior_type).toLowerCase().replace(/[_\s]/g, '-');
611
+ let priorParams;
612
+ try {
613
+ priorParams = JSON.parse(String(args.prior_params));
614
+ }
615
+ catch {
616
+ return '**Error**: prior_params must be valid JSON.';
617
+ }
618
+ const obsStr = String(args.observations);
619
+ let out = `## Bayesian Inference — ${priorType}\n\n`;
620
+ if (priorType === 'beta-binomial') {
621
+ const alpha0 = Number(priorParams.alpha) || 1;
622
+ const beta0 = Number(priorParams.beta) || 1;
623
+ const obs = parseCSV(obsStr);
624
+ const successes = obs.filter(v => v === 1 || v > 0).length;
625
+ const failures = obs.length - successes;
626
+ // Posterior: Beta(alpha0 + successes, beta0 + failures)
627
+ const alphaPost = alpha0 + successes;
628
+ const betaPost = beta0 + failures;
629
+ // Posterior mean and variance
630
+ const postMean = alphaPost / (alphaPost + betaPost);
631
+ const postVar = (alphaPost * betaPost) / ((alphaPost + betaPost) ** 2 * (alphaPost + betaPost + 1));
632
+ // 95% credible interval via normal approximation (good for large alpha+beta)
633
+ const postStd = Math.sqrt(postVar);
634
+ const ci95Lower = Math.max(0, postMean - 1.96 * postStd);
635
+ const ci95Upper = Math.min(1, postMean + 1.96 * postStd);
636
+ // Bayes factor for H1: p > 0.5 vs H0: p <= 0.5
637
+ // BF = P(data | H1) / P(data | H0) using prior predictive
638
+ const logBF = logGamma(alphaPost + betaPost) - logGamma(alphaPost) - logGamma(betaPost) -
639
+ (logGamma(alpha0 + beta0) - logGamma(alpha0) - logGamma(beta0));
640
+ out += `### Prior\n`;
641
+ out += `Beta(alpha = ${alpha0}, beta = ${beta0})\n\n`;
642
+ out += `### Data\n`;
643
+ out += `- Observations: ${obs.length}\n`;
644
+ out += `- Successes: ${successes}\n`;
645
+ out += `- Failures: ${failures}\n\n`;
646
+ out += `### Posterior\n`;
647
+ out += `Beta(alpha = ${alphaPost}, beta = ${betaPost})\n\n`;
648
+ out += `| Statistic | Value |\n|---|---|\n`;
649
+ out += `| Posterior Mean | ${fmt(postMean, 6)} |\n`;
650
+ out += `| Posterior Variance | ${fmt(postVar, 6)} |\n`;
651
+ out += `| Posterior Mode | ${fmt(alphaPost > 1 && betaPost > 1 ? (alphaPost - 1) / (alphaPost + betaPost - 2) : postMean, 6)} |\n`;
652
+ out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
653
+ out += `| Log Marginal Likelihood | ${fmt(logBF, 4)} |\n`;
654
+ }
655
+ else if (priorType === 'normal-normal') {
656
+ const mu0 = Number(priorParams.mu) || 0;
657
+ const sigma0 = Number(priorParams.sigma) || 1;
658
+ const tau0 = 1 / (sigma0 * sigma0); // prior precision
659
+ const obs = parseCSV(obsStr);
660
+ const n = obs.length;
661
+ const xBar = mean(obs);
662
+ const sigmaLikelihood = Number(priorParams.sigma_likelihood) || stddev(obs) || 1;
663
+ const tauLikelihood = 1 / (sigmaLikelihood * sigmaLikelihood);
664
+ // Posterior: Normal(muPost, 1/tauPost)
665
+ const tauPost = tau0 + n * tauLikelihood;
666
+ const muPost = (tau0 * mu0 + n * tauLikelihood * xBar) / tauPost;
667
+ const sigmaPost = 1 / Math.sqrt(tauPost);
668
+ const ci95Lower = muPost - 1.96 * sigmaPost;
669
+ const ci95Upper = muPost + 1.96 * sigmaPost;
670
+ out += `### Prior\n`;
671
+ out += `Normal(mu = ${mu0}, sigma = ${sigma0})\n\n`;
672
+ out += `### Data\n`;
673
+ out += `- N: ${n}\n`;
674
+ out += `- Sample Mean: ${fmt(xBar, 6)}\n`;
675
+ out += `- Known/Estimated Sigma: ${fmt(sigmaLikelihood, 6)}\n\n`;
676
+ out += `### Posterior\n`;
677
+ out += `Normal(mu = ${fmt(muPost, 6)}, sigma = ${fmt(sigmaPost, 6)})\n\n`;
678
+ out += `| Statistic | Value |\n|---|---|\n`;
679
+ out += `| Posterior Mean | ${fmt(muPost, 6)} |\n`;
680
+ out += `| Posterior Std Dev | ${fmt(sigmaPost, 6)} |\n`;
681
+ out += `| Posterior Precision | ${fmt(tauPost, 6)} |\n`;
682
+ out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
683
+ out += `| Prior Weight | ${fmt(tau0 / tauPost * 100, 1)}% |\n`;
684
+ out += `| Data Weight | ${fmt(n * tauLikelihood / tauPost * 100, 1)}% |\n`;
685
+ }
686
+ else if (priorType === 'gamma-poisson') {
687
+ const alpha0 = Number(priorParams.alpha) || 1;
688
+ const beta0 = Number(priorParams.beta) || 1;
689
+ const obs = parseCSV(obsStr);
690
+ const n = obs.length;
691
+ const total = sum(obs);
692
+ // Posterior: Gamma(alpha0 + sum, beta0 + n)
693
+ const alphaPost = alpha0 + total;
694
+ const betaPost = beta0 + n;
695
+ const postMean = alphaPost / betaPost;
696
+ const postVar = alphaPost / (betaPost * betaPost);
697
+ // Credible interval via normal approximation
698
+ const postStd = Math.sqrt(postVar);
699
+ const ci95Lower = Math.max(0, postMean - 1.96 * postStd);
700
+ const ci95Upper = postMean + 1.96 * postStd;
701
+ out += `### Prior\n`;
702
+ out += `Gamma(alpha = ${alpha0}, beta = ${beta0})\n\n`;
703
+ out += `### Data\n`;
704
+ out += `- N: ${n}\n`;
705
+ out += `- Sum: ${total}\n`;
706
+ out += `- Sample Mean: ${fmt(total / n, 4)}\n\n`;
707
+ out += `### Posterior\n`;
708
+ out += `Gamma(alpha = ${fmt(alphaPost, 2)}, beta = ${fmt(betaPost, 2)})\n\n`;
709
+ out += `| Statistic | Value |\n|---|---|\n`;
710
+ out += `| Posterior Mean (rate) | ${fmt(postMean, 6)} |\n`;
711
+ out += `| Posterior Variance | ${fmt(postVar, 6)} |\n`;
712
+ out += `| Posterior Mode | ${fmt(alphaPost > 1 ? (alphaPost - 1) / betaPost : 0, 6)} |\n`;
713
+ out += `| 95% Credible Interval | [${fmt(ci95Lower, 4)}, ${fmt(ci95Upper, 4)}] |\n`;
714
+ }
715
+ else if (priorType === 'dirichlet-multinomial') {
716
+ let alphas;
717
+ if (Array.isArray(priorParams.alpha)) {
718
+ alphas = priorParams.alpha.map(Number);
719
+ }
720
+ else {
721
+ alphas = parseCSV(String(priorParams.alpha || '1,1,1'));
722
+ }
723
+ // Observations: counts for each category
724
+ const obs = parseCSV(obsStr);
725
+ if (obs.length !== alphas.length) {
726
+ return `**Error**: Number of observation categories (${obs.length}) must match number of prior alphas (${alphas.length}).`;
727
+ }
728
+ const alphasPost = alphas.map((a, i) => a + obs[i]);
729
+ const alphaSum = sum(alphasPost);
730
+ const postMeans = alphasPost.map(a => a / alphaSum);
731
+ out += `### Prior\n`;
732
+ out += `Dirichlet(${alphas.map(a => fmt(a, 1)).join(', ')})\n\n`;
733
+ out += `### Data (counts)\n`;
734
+ out += obs.map((o, i) => `- Category ${i + 1}: ${o}`).join('\n') + '\n\n';
735
+ out += `### Posterior\n`;
736
+ out += `Dirichlet(${alphasPost.map(a => fmt(a, 1)).join(', ')})\n\n`;
737
+ out += `| Category | Prior alpha | Posterior alpha | Posterior Mean |\n|---|---|---|---|\n`;
738
+ alphas.forEach((a, i) => {
739
+ out += `| ${i + 1} | ${fmt(a, 1)} | ${fmt(alphasPost[i], 1)} | ${fmt(postMeans[i], 6)} |\n`;
740
+ });
741
+ out += `\n| Total observations: ${sum(obs)} | Alpha sum: ${fmt(alphaSum, 1)} |\n`;
742
+ }
743
+ else {
744
+ return `**Error**: Unknown prior_type "${priorType}". Use: beta-binomial, normal-normal, gamma-poisson, dirichlet-multinomial.`;
745
+ }
746
+ return out;
747
+ },
748
+ });
749
+ // ── 3. TIME SERIES ANALYSIS ──
750
+ registerTool({
751
+ name: 'time_series_analyze',
752
+ description: 'Time series decomposition, moving averages, exponential smoothing (Holt-Winters), and simple ARIMA estimation. Returns trend, seasonal, and residual components plus forecasts.',
753
+ parameters: {
754
+ data: { type: 'string', description: 'Time series values (comma-separated numbers)', required: true },
755
+ frequency: { type: 'number', description: 'Seasonal frequency (e.g. 12 for monthly, 4 for quarterly)', required: true },
756
+ forecast_periods: { type: 'number', description: 'Number of periods to forecast (default 5)' },
757
+ method: { type: 'string', description: 'Method: decomposition, arima, or exponential_smoothing', required: true },
758
+ },
759
+ tier: 'free',
760
+ async execute(args) {
761
+ const data = parseCSV(String(args.data));
762
+ const freq = Number(args.frequency) || 4;
763
+ const forecastN = Number(args.forecast_periods) || 5;
764
+ const method = String(args.method).toLowerCase().replace(/[_\s]/g, '_');
765
+ if (data.length < 4)
766
+ return '**Error**: Need at least 4 data points.';
767
+ let out = `## Time Series Analysis\n\n`;
768
+ out += `- **Method:** ${method}\n`;
769
+ out += `- **N:** ${data.length}\n`;
770
+ out += `- **Frequency:** ${freq}\n`;
771
+ out += `- **Forecast Periods:** ${forecastN}\n\n`;
772
+ if (method === 'decomposition') {
773
+ // Additive decomposition: Y = Trend + Seasonal + Residual
774
+ // 1. Trend via centered moving average
775
+ const trend = new Array(data.length).fill(null);
776
+ const halfWin = Math.floor(freq / 2);
777
+ for (let i = halfWin; i < data.length - halfWin; i++) {
778
+ let s = 0;
779
+ if (freq % 2 === 0) {
780
+ // Even frequency: average first and last elements with half weight
781
+ for (let j = i - halfWin; j <= i + halfWin; j++) {
782
+ const weight = (j === i - halfWin || j === i + halfWin) ? 0.5 : 1;
783
+ s += data[j] * weight;
784
+ }
785
+ trend[i] = s / freq;
786
+ }
787
+ else {
788
+ for (let j = i - halfWin; j <= i + halfWin; j++)
789
+ s += data[j];
790
+ trend[i] = s / freq;
791
+ }
792
+ }
793
+ // 2. Detrended series
794
+ const detrended = data.map((v, i) => trend[i] !== null ? v - trend[i] : null);
795
+ // 3. Seasonal component: average detrended values by position in cycle
796
+ const seasonal = new Array(freq).fill(0);
797
+ const seasonalCounts = new Array(freq).fill(0);
798
+ detrended.forEach((v, i) => {
799
+ if (v !== null) {
800
+ seasonal[i % freq] += v;
801
+ seasonalCounts[i % freq]++;
802
+ }
803
+ });
804
+ for (let i = 0; i < freq; i++) {
805
+ seasonal[i] = seasonalCounts[i] > 0 ? seasonal[i] / seasonalCounts[i] : 0;
806
+ }
807
+ // Center the seasonal component
808
+ const seasonalMean = mean(seasonal);
809
+ for (let i = 0; i < freq; i++)
810
+ seasonal[i] -= seasonalMean;
811
+ // 4. Residual
812
+ const residual = data.map((v, i) => {
813
+ const t = trend[i] !== null ? trend[i] : mean(data);
814
+ return v - t - seasonal[i % freq];
815
+ });
816
+ // Forecast: extend trend linearly + seasonal
817
+ const trendValues = trend.filter(t => t !== null);
818
+ const trendSlope = trendValues.length >= 2
819
+ ? (trendValues[trendValues.length - 1] - trendValues[0]) / (trendValues.length - 1)
820
+ : 0;
821
+ const lastTrend = trendValues[trendValues.length - 1] || mean(data);
822
+ const forecasts = [];
823
+ for (let i = 1; i <= forecastN; i++) {
824
+ forecasts.push(lastTrend + trendSlope * i + seasonal[(data.length + i - 1) % freq]);
825
+ }
826
+ out += `### Seasonal Indices\n\n`;
827
+ out += `| Period | Index |\n|---|---|\n`;
828
+ seasonal.forEach((s, i) => { out += `| ${i + 1} | ${fmt(s, 4)} |\n`; });
829
+ out += `\n### Decomposition Summary\n\n`;
830
+ out += `| Component | Mean | Std Dev |\n|---|---|---|\n`;
831
+ out += `| Trend | ${fmt(mean(trendValues), 4)} | ${fmt(stddev(trendValues), 4)} |\n`;
832
+ out += `| Seasonal | ${fmt(mean(seasonal), 4)} | ${fmt(stddev(seasonal), 4)} |\n`;
833
+ out += `| Residual | ${fmt(mean(residual), 4)} | ${fmt(stddev(residual), 4)} |\n`;
834
+ out += `\n### Forecasts\n\n`;
835
+ out += `| Period | Value |\n|---|---|\n`;
836
+ forecasts.forEach((f, i) => { out += `| t+${i + 1} | ${fmt(f, 4)} |\n`; });
837
+ }
838
+ else if (method === 'exponential_smoothing') {
839
+ // Holt-Winters additive method
840
+ // Initialize
841
+ const alpha = 0.3; // level smoothing
842
+ const beta = 0.1; // trend smoothing
843
+ const gammaParam = 0.1; // seasonal smoothing
844
+ // Initialize level and trend from first cycle
845
+ let level = mean(data.slice(0, Math.min(freq, data.length)));
846
+ let trendVal = data.length > freq
847
+ ? (mean(data.slice(freq, Math.min(2 * freq, data.length))) - mean(data.slice(0, freq))) / freq
848
+ : 0;
849
+ // Initialize seasonal indices
850
+ const seasonals = new Array(freq).fill(0);
851
+ if (data.length >= freq) {
852
+ for (let i = 0; i < freq; i++)
853
+ seasonals[i] = data[i] - level;
854
+ }
855
+ // Fit
856
+ const fitted = [];
857
+ const residuals = [];
858
+ for (let i = 0; i < data.length; i++) {
859
+ const si = i % freq;
860
+ const predicted = level + trendVal + seasonals[si];
861
+ fitted.push(predicted);
862
+ residuals.push(data[i] - predicted);
863
+ const newLevel = alpha * (data[i] - seasonals[si]) + (1 - alpha) * (level + trendVal);
864
+ const newTrend = beta * (newLevel - level) + (1 - beta) * trendVal;
865
+ seasonals[si] = gammaParam * (data[i] - newLevel) + (1 - gammaParam) * seasonals[si];
866
+ level = newLevel;
867
+ trendVal = newTrend;
868
+ }
869
+ // Forecast
870
+ const forecasts = [];
871
+ for (let i = 1; i <= forecastN; i++) {
872
+ forecasts.push(level + trendVal * i + seasonals[(data.length + i - 1) % freq]);
873
+ }
874
+ const mse = residuals.reduce((s, r) => s + r * r, 0) / residuals.length;
875
+ const mae = residuals.reduce((s, r) => s + Math.abs(r), 0) / residuals.length;
876
+ out += `### Holt-Winters Parameters\n\n`;
877
+ out += `| Parameter | Value |\n|---|---|\n`;
878
+ out += `| Alpha (level) | ${alpha} |\n`;
879
+ out += `| Beta (trend) | ${beta} |\n`;
880
+ out += `| Gamma (seasonal) | ${gammaParam} |\n`;
881
+ out += `| Final Level | ${fmt(level, 4)} |\n`;
882
+ out += `| Final Trend | ${fmt(trendVal, 4)} |\n`;
883
+ out += `\n### Fit Quality\n\n`;
884
+ out += `| Metric | Value |\n|---|---|\n`;
885
+ out += `| MSE | ${fmt(mse, 4)} |\n`;
886
+ out += `| RMSE | ${fmt(Math.sqrt(mse), 4)} |\n`;
887
+ out += `| MAE | ${fmt(mae, 4)} |\n`;
888
+ out += `\n### Seasonal Indices\n\n`;
889
+ out += `| Period | Index |\n|---|---|\n`;
890
+ seasonals.forEach((s, i) => { out += `| ${i + 1} | ${fmt(s, 4)} |\n`; });
891
+ out += `\n### Forecasts\n\n`;
892
+ out += `| Period | Value |\n|---|---|\n`;
893
+ forecasts.forEach((f, i) => { out += `| t+${i + 1} | ${fmt(f, 4)} |\n`; });
894
+ }
895
+ else if (method === 'arima') {
896
+ // Simple ARIMA(1,1,1) estimation
897
+ // Step 1: First difference
898
+ const diff = [];
899
+ for (let i = 1; i < data.length; i++)
900
+ diff.push(data[i] - data[i - 1]);
901
+ // Step 2: Estimate AR(1) coefficient via OLS on differenced series
902
+ let sumXY = 0, sumXX = 0;
903
+ for (let i = 1; i < diff.length; i++) {
904
+ sumXY += diff[i - 1] * diff[i];
905
+ sumXX += diff[i - 1] * diff[i - 1];
906
+ }
907
+ const phi = sumXX > 0 ? sumXY / sumXX : 0;
908
+ // Step 3: Compute AR residuals
909
+ const arResid = [diff[0]];
910
+ for (let i = 1; i < diff.length; i++) {
911
+ arResid.push(diff[i] - phi * diff[i - 1]);
912
+ }
913
+ // Step 4: Estimate MA(1) coefficient via autocorrelation of residuals
914
+ const arResidMean = mean(arResid);
915
+ let rho1Num = 0, rho1Den = 0;
916
+ for (let i = 0; i < arResid.length; i++) {
917
+ rho1Den += (arResid[i] - arResidMean) ** 2;
918
+ if (i > 0)
919
+ rho1Num += (arResid[i] - arResidMean) * (arResid[i - 1] - arResidMean);
920
+ }
921
+ const theta = rho1Den > 0 ? -(rho1Num / rho1Den) : 0;
922
+ // Step 5: Compute final residuals
923
+ const finalResid = [arResid[0]];
924
+ for (let i = 1; i < arResid.length; i++) {
925
+ finalResid.push(arResid[i] + theta * finalResid[i - 1]);
926
+ }
927
+ const sigmaResid = stddev(finalResid);
928
+ // Step 6: Forecast (on differenced scale, then integrate)
929
+ const lastDiff = diff[diff.length - 1];
930
+ const lastResid = finalResid[finalResid.length - 1];
931
+ const forecasts = [];
932
+ let prevDiff = lastDiff;
933
+ let prevResid = lastResid;
934
+ let lastVal = data[data.length - 1];
935
+ for (let i = 0; i < forecastN; i++) {
936
+ const nextDiff = phi * prevDiff + theta * prevResid;
937
+ lastVal += nextDiff;
938
+ forecasts.push(lastVal);
939
+ prevResid = 0; // future residuals assumed 0
940
+ prevDiff = nextDiff;
941
+ }
942
+ // AIC = n * ln(RSS/n) + 2k
943
+ const rss = finalResid.reduce((s, r) => s + r * r, 0);
944
+ const nResid = finalResid.length;
945
+ const aic = nResid * Math.log(rss / nResid) + 2 * 3; // 3 params: phi, theta, sigma
946
+ out += `### ARIMA(1,1,1) Parameter Estimates\n\n`;
947
+ out += `| Parameter | Value |\n|---|---|\n`;
948
+ out += `| AR(1) phi | ${fmt(phi, 6)} |\n`;
949
+ out += `| MA(1) theta | ${fmt(theta, 6)} |\n`;
950
+ out += `| Residual Sigma | ${fmt(sigmaResid, 6)} |\n`;
951
+ out += `| AIC | ${fmt(aic, 4)} |\n`;
952
+ out += `\n### Differenced Series Summary\n\n`;
953
+ out += `| Statistic | Value |\n|---|---|\n`;
954
+ out += `| Mean | ${fmt(mean(diff), 4)} |\n`;
955
+ out += `| Std Dev | ${fmt(stddev(diff), 4)} |\n`;
956
+ out += `| Autocorrelation(1) | ${fmt(rho1Den > 0 ? rho1Num / rho1Den : 0, 4)} |\n`;
957
+ out += `\n### Forecasts\n\n`;
958
+ out += `| Period | Value | 95% CI |\n|---|---|---|\n`;
959
+ forecasts.forEach((f, i) => {
960
+ const ciWidth = 1.96 * sigmaResid * Math.sqrt(i + 1);
961
+ out += `| t+${i + 1} | ${fmt(f, 4)} | [${fmt(f - ciWidth, 4)}, ${fmt(f + ciWidth, 4)}] |\n`;
962
+ });
963
+ }
964
+ else {
965
+ return `**Error**: Unknown method "${method}". Use: decomposition, arima, exponential_smoothing.`;
966
+ }
967
+ return out;
968
+ },
969
+ });
970
+ // ── 4. DIMENSIONALITY REDUCTION (PCA) ──
971
+ registerTool({
972
+ name: 'dimensionality_reduce',
973
+ description: 'Principal Component Analysis (PCA) via eigendecomposition of the covariance matrix. Returns principal components, explained variance ratios, and loadings.',
974
+ parameters: {
975
+ data: { type: 'string', description: 'Data as JSON array of arrays (each inner array is a sample)', required: true },
976
+ method: { type: 'string', description: 'Method: pca (currently the only supported method)', required: true },
977
+ n_components: { type: 'number', description: 'Number of principal components to return (default 2)' },
978
+ },
979
+ tier: 'free',
980
+ async execute(args) {
981
+ let dataMatrix;
982
+ try {
983
+ dataMatrix = JSON.parse(String(args.data));
984
+ }
985
+ catch {
986
+ return '**Error**: data must be a valid JSON array of arrays.';
987
+ }
988
+ if (!Array.isArray(dataMatrix) || dataMatrix.length < 2 || !Array.isArray(dataMatrix[0])) {
989
+ return '**Error**: data must be a 2D array with at least 2 rows.';
990
+ }
991
+ const methodName = String(args.method).toLowerCase();
992
+ if (methodName !== 'pca') {
993
+ return `**Error**: Unknown method "${methodName}". Currently only "pca" is supported.`;
994
+ }
995
+ const n = dataMatrix.length;
996
+ const p = dataMatrix[0].length;
997
+ const nComp = Math.min(Number(args.n_components) || 2, p);
998
+ // 1. Center the data (subtract column means)
999
+ const colMeans = Array.from({ length: p }, (_, j) => mean(dataMatrix.map(row => row[j])));
1000
+ const centered = dataMatrix.map(row => row.map((v, j) => v - colMeans[j]));
1001
+ // 2. Compute covariance matrix (1/(n-1) * X'X)
1002
+ const covMatrix = matCreate(p, p);
1003
+ for (let i = 0; i < p; i++) {
1004
+ for (let j = i; j < p; j++) {
1005
+ let s = 0;
1006
+ for (let k = 0; k < n; k++)
1007
+ s += centered[k][i] * centered[k][j];
1008
+ covMatrix[i][j] = s / (n - 1);
1009
+ covMatrix[j][i] = covMatrix[i][j];
1010
+ }
1011
+ }
1012
+ // 3. Eigendecomposition
1013
+ const eigen = symmetricEigen(covMatrix);
1014
+ // 4. Sort eigenvalues descending
1015
+ const indices = eigen.values.map((v, i) => ({ val: v, idx: i }))
1016
+ .sort((a, b) => b.val - a.val);
1017
+ const totalVariance = sum(eigen.values.filter(v => v > 0));
1018
+ const explainedRatios = indices.map(({ val }) => Math.max(0, val) / (totalVariance || 1));
1019
+ const cumulativeRatios = [];
1020
+ let cumSum = 0;
1021
+ for (const r of explainedRatios) {
1022
+ cumSum += r;
1023
+ cumulativeRatios.push(cumSum);
1024
+ }
1025
+ // 5. Loadings: eigenvectors (columns of V) corresponding to top eigenvalues
1026
+ const loadings = [];
1027
+ for (let c = 0; c < nComp; c++) {
1028
+ const colIdx = indices[c].idx;
1029
+ loadings.push(Array.from({ length: p }, (_, row) => eigen.vectors[row][colIdx]));
1030
+ }
1031
+ // 6. Project data onto principal components
1032
+ const projected = centered.map(row => {
1033
+ const scores = [];
1034
+ for (let c = 0; c < nComp; c++) {
1035
+ let score = 0;
1036
+ for (let j = 0; j < p; j++)
1037
+ score += row[j] * loadings[c][j];
1038
+ scores.push(score);
1039
+ }
1040
+ return scores;
1041
+ });
1042
+ let out = `## PCA — Principal Component Analysis\n\n`;
1043
+ out += `- **Samples:** ${n}\n`;
1044
+ out += `- **Features:** ${p}\n`;
1045
+ out += `- **Components:** ${nComp}\n\n`;
1046
+ out += `### Explained Variance\n\n`;
1047
+ out += `| PC | Eigenvalue | Explained % | Cumulative % |\n|---|---|---|---|\n`;
1048
+ for (let i = 0; i < Math.min(p, nComp + 2); i++) {
1049
+ const marker = i < nComp ? ' *' : '';
1050
+ out += `| PC${i + 1}${marker} | ${fmt(Math.max(0, indices[i].val), 6)} | ${fmt(explainedRatios[i] * 100, 2)}% | ${fmt(cumulativeRatios[i] * 100, 2)}% |\n`;
1051
+ }
1052
+ out += `\n### Loadings (top ${nComp} components)\n\n`;
1053
+ out += `| Feature |`;
1054
+ for (let c = 0; c < nComp; c++)
1055
+ out += ` PC${c + 1} |`;
1056
+ out += `\n|---|`;
1057
+ for (let c = 0; c < nComp; c++)
1058
+ out += `---|`;
1059
+ out += `\n`;
1060
+ for (let j = 0; j < p; j++) {
1061
+ out += `| Feature ${j + 1} |`;
1062
+ for (let c = 0; c < nComp; c++)
1063
+ out += ` ${fmt(loadings[c][j], 4)} |`;
1064
+ out += `\n`;
1065
+ }
1066
+ out += `\n### Projected Scores (first ${Math.min(10, n)} samples)\n\n`;
1067
+ out += `| Sample |`;
1068
+ for (let c = 0; c < nComp; c++)
1069
+ out += ` PC${c + 1} |`;
1070
+ out += `\n|---|`;
1071
+ for (let c = 0; c < nComp; c++)
1072
+ out += `---|`;
1073
+ out += `\n`;
1074
+ for (let i = 0; i < Math.min(10, n); i++) {
1075
+ out += `| ${i + 1} |`;
1076
+ for (let c = 0; c < nComp; c++)
1077
+ out += ` ${fmt(projected[i][c], 4)} |`;
1078
+ out += `\n`;
1079
+ }
1080
+ if (n > 10)
1081
+ out += `\n*...${n - 10} more samples omitted.*\n`;
1082
+ return out;
1083
+ },
1084
+ });
1085
+ // ── 5. DISTRIBUTION FIT ──
1086
+ registerTool({
1087
+ name: 'distribution_fit',
1088
+ description: 'Fit data to distributions (normal, poisson, exponential, gamma, weibull, lognormal) using MLE. Kolmogorov-Smirnov goodness-of-fit test. AIC/BIC comparison.',
1089
+ parameters: {
1090
+ data: { type: 'string', description: 'Data values (comma-separated numbers)', required: true },
1091
+ candidate_distributions: { type: 'string', description: 'Distributions to test (comma-separated): normal, poisson, exponential, gamma, weibull, lognormal', required: true },
1092
+ },
1093
+ tier: 'free',
1094
+ async execute(args) {
1095
+ const data = parseCSV(String(args.data));
1096
+ if (data.length < 3)
1097
+ return '**Error**: Need at least 3 data points.';
1098
+ const candidates = String(args.candidate_distributions).split(',').map(s => s.trim().toLowerCase());
1099
+ const n = data.length;
1100
+ const sorted = [...data].sort((a, b) => a - b);
1101
+ // Empirical CDF values for KS test
1102
+ const empiricalCDF = sorted.map((_, i) => (i + 1) / n);
1103
+ const results = [];
1104
+ for (const dist of candidates) {
1105
+ let params = {};
1106
+ let cdfFn;
1107
+ let logLikFn;
1108
+ let nParams = 0;
1109
+ if (dist === 'normal') {
1110
+ const mu = mean(data);
1111
+ const sigma = stddev(data);
1112
+ params = { mu, sigma };
1113
+ nParams = 2;
1114
+ cdfFn = (x) => normalCDF((x - mu) / sigma);
1115
+ logLikFn = (x) => -0.5 * Math.log(2 * Math.PI) - Math.log(sigma) - 0.5 * ((x - mu) / sigma) ** 2;
1116
+ }
1117
+ else if (dist === 'poisson') {
1118
+ const lambda = mean(data);
1119
+ params = { lambda };
1120
+ nParams = 1;
1121
+ // Poisson CDF via summation
1122
+ cdfFn = (x) => {
1123
+ const k = Math.floor(x);
1124
+ if (k < 0)
1125
+ return 0;
1126
+ let p = 0;
1127
+ for (let i = 0; i <= k; i++) {
1128
+ p += Math.exp(-lambda + i * Math.log(lambda) - logGamma(i + 1));
1129
+ }
1130
+ return Math.min(1, p);
1131
+ };
1132
+ logLikFn = (x) => {
1133
+ const k = Math.round(x);
1134
+ return -lambda + k * Math.log(lambda) - logGamma(k + 1);
1135
+ };
1136
+ }
1137
+ else if (dist === 'exponential') {
1138
+ const lambda = 1 / mean(data.filter(v => v > 0));
1139
+ params = { lambda };
1140
+ nParams = 1;
1141
+ cdfFn = (x) => x >= 0 ? 1 - Math.exp(-lambda * x) : 0;
1142
+ logLikFn = (x) => x >= 0 ? Math.log(lambda) - lambda * x : -Infinity;
1143
+ }
1144
+ else if (dist === 'gamma') {
1145
+ // MLE via method of moments
1146
+ const m = mean(data.filter(v => v > 0));
1147
+ const v = variance(data.filter(v => v > 0));
1148
+ const shape = v > 0 ? (m * m) / v : 1;
1149
+ const rate = v > 0 ? m / v : 1;
1150
+ params = { shape, rate };
1151
+ nParams = 2;
1152
+ cdfFn = (x) => x > 0 ? lowerIncompleteGammaP(shape, rate * x) : 0;
1153
+ logLikFn = (x) => {
1154
+ if (x <= 0)
1155
+ return -Infinity;
1156
+ return (shape - 1) * Math.log(x) + shape * Math.log(rate) - rate * x - logGamma(shape);
1157
+ };
1158
+ }
1159
+ else if (dist === 'weibull') {
1160
+ // MLE via Newton-Raphson for shape, then scale
1161
+ const positiveData = data.filter(v => v > 0);
1162
+ if (positiveData.length < 2)
1163
+ continue;
1164
+ const logData = positiveData.map(v => Math.log(v));
1165
+ const meanLog = mean(logData);
1166
+ // Newton-Raphson for shape parameter k
1167
+ let k = 1.0;
1168
+ for (let iter = 0; iter < 50; iter++) {
1169
+ const xk = positiveData.map(x => x ** k);
1170
+ const xkLogX = positiveData.map((x, i) => (x ** k) * logData[i]);
1171
+ const sumXk = sum(xk);
1172
+ const sumXkLogX = sum(xkLogX);
1173
+ const sumLogX = sum(logData);
1174
+ const f = sumXkLogX / sumXk - 1 / k - sumLogX / positiveData.length;
1175
+ // Approximate derivative
1176
+ const h = 0.001;
1177
+ const xkh = positiveData.map(x => x ** (k + h));
1178
+ const xkhLogX = positiveData.map((x, i) => (x ** (k + h)) * logData[i]);
1179
+ const fh = sum(xkhLogX) / sum(xkh) - 1 / (k + h) - sumLogX / positiveData.length;
1180
+ const df = (fh - f) / h;
1181
+ if (Math.abs(df) < 1e-20)
1182
+ break;
1183
+ const step = f / df;
1184
+ k -= step;
1185
+ if (k <= 0.01)
1186
+ k = 0.01;
1187
+ if (Math.abs(step) < 1e-10)
1188
+ break;
1189
+ }
1190
+ const lambda_w = (sum(data.filter(v => v > 0).map(x => x ** k)) / positiveData.length) ** (1 / k);
1191
+ params = { shape: k, scale: lambda_w };
1192
+ nParams = 2;
1193
+ cdfFn = (x) => x > 0 ? 1 - Math.exp(-((x / lambda_w) ** k)) : 0;
1194
+ logLikFn = (x) => {
1195
+ if (x <= 0)
1196
+ return -Infinity;
1197
+ return Math.log(k) - k * Math.log(lambda_w) + (k - 1) * Math.log(x) - (x / lambda_w) ** k;
1198
+ };
1199
+ }
1200
+ else if (dist === 'lognormal') {
1201
+ const logData = data.filter(v => v > 0).map(v => Math.log(v));
1202
+ if (logData.length < 2)
1203
+ continue;
1204
+ const mu = mean(logData);
1205
+ const sigma = stddev(logData);
1206
+ params = { mu, sigma };
1207
+ nParams = 2;
1208
+ cdfFn = (x) => x > 0 ? normalCDF((Math.log(x) - mu) / sigma) : 0;
1209
+ logLikFn = (x) => {
1210
+ if (x <= 0)
1211
+ return -Infinity;
1212
+ return -Math.log(x) - 0.5 * Math.log(2 * Math.PI) - Math.log(sigma) - 0.5 * ((Math.log(x) - mu) / sigma) ** 2;
1213
+ };
1214
+ }
1215
+ else {
1216
+ continue;
1217
+ }
1218
+ // Log-likelihood
1219
+ const logLik = data.reduce((s, x) => s + logLikFn(x), 0);
1220
+ // AIC and BIC
1221
+ const aic = -2 * logLik + 2 * nParams;
1222
+ const bic = -2 * logLik + nParams * Math.log(n);
1223
+ // KS test: max |F_empirical(x) - F_theoretical(x)|
1224
+ let ksMax = 0;
1225
+ for (let i = 0; i < sorted.length; i++) {
1226
+ const theorCDF = cdfFn(sorted[i]);
1227
+ const diff1 = Math.abs(empiricalCDF[i] - theorCDF);
1228
+ const diff2 = Math.abs((i > 0 ? empiricalCDF[i - 1] : 0) - theorCDF);
1229
+ ksMax = Math.max(ksMax, diff1, diff2);
1230
+ }
1231
+ // KS p-value approximation (Kolmogorov distribution, large sample)
1232
+ const sqrtN = Math.sqrt(n);
1233
+ const z = (sqrtN + 0.12 + 0.11 / sqrtN) * ksMax;
1234
+ const ksPValue = 2 * Math.exp(-2 * z * z);
1235
+ results.push({
1236
+ name: dist,
1237
+ params,
1238
+ logLik,
1239
+ aic,
1240
+ bic,
1241
+ ksStatistic: ksMax,
1242
+ ksPValue: Math.min(1, Math.max(0, ksPValue)),
1243
+ nParams,
1244
+ });
1245
+ }
1246
+ if (results.length === 0)
1247
+ return '**Error**: No valid distributions to test.';
1248
+ // Sort by AIC
1249
+ results.sort((a, b) => a.aic - b.aic);
1250
+ let out = `## Distribution Fitting Results\n\n`;
1251
+ out += `**N = ${n}** | Data range: [${fmt(sorted[0], 4)}, ${fmt(sorted[sorted.length - 1], 4)}] | Mean: ${fmt(mean(data), 4)} | SD: ${fmt(stddev(data), 4)}\n\n`;
1252
+ out += `### Model Comparison (sorted by AIC)\n\n`;
1253
+ out += `| Distribution | AIC | BIC | Log-Lik | KS Stat | KS p-value | Fit |\n|---|---|---|---|---|---|---|\n`;
1254
+ results.forEach((r, i) => {
1255
+ const fit = r.ksPValue > 0.05 ? 'Good' : r.ksPValue > 0.01 ? 'Marginal' : 'Poor';
1256
+ const best = i === 0 ? ' **BEST**' : '';
1257
+ out += `| ${r.name}${best} | ${fmt(r.aic, 2)} | ${fmt(r.bic, 2)} | ${fmt(r.logLik, 2)} | ${fmt(r.ksStatistic, 4)} | ${r.ksPValue < 0.001 ? '<0.001' : fmt(r.ksPValue, 4)} | ${fit} |\n`;
1258
+ });
1259
+ out += `\n### Parameter Estimates\n\n`;
1260
+ results.forEach(r => {
1261
+ out += `**${r.name}**: `;
1262
+ out += Object.entries(r.params).map(([k, v]) => `${k} = ${fmt(v, 6)}`).join(', ');
1263
+ out += `\n`;
1264
+ });
1265
+ return out;
1266
+ },
1267
+ });
1268
+ // ── 6. CORRELATION MATRIX ──
1269
+ registerTool({
1270
+ name: 'correlation_matrix',
1271
+ description: 'Compute Pearson, Spearman, or Kendall correlation matrix. Output as formatted table with significance markers (*** p<0.001, ** p<0.01, * p<0.05).',
1272
+ parameters: {
1273
+ data: { type: 'string', description: 'Data as JSON array of arrays (each inner array is a variable\'s values)', required: true },
1274
+ method: { type: 'string', description: 'Correlation method: pearson, spearman, or kendall', required: true },
1275
+ variable_names: { type: 'string', description: 'Variable names (comma-separated, optional)' },
1276
+ },
1277
+ tier: 'free',
1278
+ async execute(args) {
1279
+ let dataMatrix;
1280
+ try {
1281
+ dataMatrix = JSON.parse(String(args.data));
1282
+ }
1283
+ catch {
1284
+ return '**Error**: data must be a valid JSON array of arrays.';
1285
+ }
1286
+ if (!Array.isArray(dataMatrix) || dataMatrix.length < 2) {
1287
+ return '**Error**: Need at least 2 variables (arrays).';
1288
+ }
1289
+ const methodName = String(args.method).toLowerCase();
1290
+ const p = dataMatrix.length;
1291
+ const n = dataMatrix[0].length;
1292
+ const names = args.variable_names
1293
+ ? String(args.variable_names).split(',').map(s => s.trim())
1294
+ : dataMatrix.map((_, i) => `V${i + 1}`);
1295
+ // Compute correlation function
1296
+ function pearsonCorr(x, y) {
1297
+ const mx = mean(x), my = mean(y);
1298
+ let num = 0, dx2 = 0, dy2 = 0;
1299
+ for (let i = 0; i < x.length; i++) {
1300
+ const dx = x[i] - mx, dy = y[i] - my;
1301
+ num += dx * dy;
1302
+ dx2 += dx * dx;
1303
+ dy2 += dy * dy;
1304
+ }
1305
+ const denom = Math.sqrt(dx2 * dy2);
1306
+ return denom > 0 ? num / denom : 0;
1307
+ }
1308
+ function spearmanCorr(x, y) {
1309
+ return pearsonCorr(rank(x), rank(y));
1310
+ }
1311
+ function kendallCorr(x, y) {
1312
+ let concordant = 0, discordant = 0;
1313
+ for (let i = 0; i < x.length; i++) {
1314
+ for (let j = i + 1; j < x.length; j++) {
1315
+ const dx = x[i] - x[j];
1316
+ const dy = y[i] - y[j];
1317
+ if (dx * dy > 0)
1318
+ concordant++;
1319
+ else if (dx * dy < 0)
1320
+ discordant++;
1321
+ }
1322
+ }
1323
+ const total = concordant + discordant;
1324
+ return total > 0 ? (concordant - discordant) / total : 0;
1325
+ }
1326
+ const corrFn = methodName === 'spearman' ? spearmanCorr
1327
+ : methodName === 'kendall' ? kendallCorr
1328
+ : pearsonCorr;
1329
+ // Build correlation matrix
1330
+ const corrMatrix = matCreate(p, p);
1331
+ const pValueMatrix = matCreate(p, p);
1332
+ for (let i = 0; i < p; i++) {
1333
+ corrMatrix[i][i] = 1.0;
1334
+ pValueMatrix[i][i] = 0;
1335
+ for (let j = i + 1; j < p; j++) {
1336
+ const r = corrFn(dataMatrix[i], dataMatrix[j]);
1337
+ corrMatrix[i][j] = r;
1338
+ corrMatrix[j][i] = r;
1339
+ // p-value for correlation
1340
+ let pVal;
1341
+ if (methodName === 'kendall') {
1342
+ // Normal approximation for Kendall's tau
1343
+ const se = Math.sqrt((2 * (2 * n + 5)) / (9 * n * (n - 1)));
1344
+ const z = Math.abs(r) / se;
1345
+ pVal = 2 * (1 - normalCDF(z));
1346
+ }
1347
+ else {
1348
+ // t-test for Pearson/Spearman
1349
+ if (Math.abs(r) >= 1) {
1350
+ pVal = 0;
1351
+ }
1352
+ else {
1353
+ const t = r * Math.sqrt((n - 2) / (1 - r * r));
1354
+ pVal = 2 * (1 - tCDF(Math.abs(t), n - 2));
1355
+ }
1356
+ }
1357
+ pValueMatrix[i][j] = pVal;
1358
+ pValueMatrix[j][i] = pVal;
1359
+ }
1360
+ }
1361
+ function sigMarker(pVal) {
1362
+ if (pVal < 0.001)
1363
+ return '***';
1364
+ if (pVal < 0.01)
1365
+ return '**';
1366
+ if (pVal < 0.05)
1367
+ return '*';
1368
+ return '';
1369
+ }
1370
+ let out = `## Correlation Matrix — ${methodName.charAt(0).toUpperCase() + methodName.slice(1)}\n\n`;
1371
+ out += `**N = ${n}** observations\n\n`;
1372
+ // Header
1373
+ out += `| |`;
1374
+ names.forEach(name => { out += ` ${name} |`; });
1375
+ out += `\n|---|`;
1376
+ names.forEach(() => { out += `---|`; });
1377
+ out += `\n`;
1378
+ // Body
1379
+ for (let i = 0; i < p; i++) {
1380
+ out += `| **${names[i]}** |`;
1381
+ for (let j = 0; j < p; j++) {
1382
+ if (i === j) {
1383
+ out += ` 1.0000 |`;
1384
+ }
1385
+ else {
1386
+ const marker = sigMarker(pValueMatrix[i][j]);
1387
+ out += ` ${fmt(corrMatrix[i][j], 4)}${marker} |`;
1388
+ }
1389
+ }
1390
+ out += `\n`;
1391
+ }
1392
+ out += `\nSignificance: \\*\\*\\* p<0.001, \\*\\* p<0.01, \\* p<0.05\n`;
1393
+ // Strongest correlations
1394
+ const pairs = [];
1395
+ for (let i = 0; i < p; i++) {
1396
+ for (let j = i + 1; j < p; j++) {
1397
+ pairs.push({ i, j, r: corrMatrix[i][j], p: pValueMatrix[i][j] });
1398
+ }
1399
+ }
1400
+ pairs.sort((a, b) => Math.abs(b.r) - Math.abs(a.r));
1401
+ if (pairs.length > 0) {
1402
+ out += `\n### Strongest Correlations\n\n`;
1403
+ out += `| Pair | r | p-value |\n|---|---|---|\n`;
1404
+ pairs.slice(0, 5).forEach(pair => {
1405
+ out += `| ${names[pair.i]} - ${names[pair.j]} | ${fmt(pair.r, 4)} | ${pair.p < 0.001 ? '<0.001' : fmt(pair.p, 4)} |\n`;
1406
+ });
1407
+ }
1408
+ return out;
1409
+ },
1410
+ });
1411
+ // ── 7. POWER ANALYSIS ──
1412
+ registerTool({
1413
+ name: 'power_analysis',
1414
+ description: 'Calculate statistical power, required sample size, or minimum detectable effect size for t-test, ANOVA, chi-square, or proportion test.',
1415
+ parameters: {
1416
+ test_type: { type: 'string', description: 'Test type: t_test, anova, chi_square, proportion', required: true },
1417
+ effect_size: { type: 'number', description: 'Effect size (Cohen\'s d for t-test, f for ANOVA, w for chi-square, h for proportion)', required: true },
1418
+ alpha: { type: 'number', description: 'Significance level (default 0.05)' },
1419
+ power: { type: 'number', description: 'Desired power (default 0.8)' },
1420
+ solve_for: { type: 'string', description: 'What to solve for: n (sample size), power, or effect (effect size)', required: true },
1421
+ },
1422
+ tier: 'free',
1423
+ async execute(args) {
1424
+ const testType = String(args.test_type).toLowerCase().replace(/[_\s]/g, '_');
1425
+ let effectSize = Number(args.effect_size) || 0.5;
1426
+ let alpha = Number(args.alpha) || 0.05;
1427
+ let power = Number(args.power) || 0.8;
1428
+ const solveFor = String(args.solve_for).toLowerCase();
1429
+ let out = `## Power Analysis\n\n`;
1430
+ out += `- **Test:** ${testType}\n`;
1431
+ // The critical z-value for one-tailed alpha
1432
+ const zAlpha = normalQuantile(1 - alpha / 2);
1433
+ if (testType === 't_test') {
1434
+ // Two-sample t-test power
1435
+ // power = P(reject H0 | H1 true) = Phi(d*sqrt(n/2) - z_alpha/2)
1436
+ if (solveFor === 'n') {
1437
+ const zBeta = normalQuantile(power);
1438
+ const n = Math.ceil(2 * ((zAlpha + zBeta) / effectSize) ** 2);
1439
+ out += `- **Effect Size (d):** ${fmt(effectSize, 4)}\n`;
1440
+ out += `- **Alpha:** ${alpha}\n`;
1441
+ out += `- **Power:** ${power}\n\n`;
1442
+ out += `### Result\n\n`;
1443
+ out += `**Required sample size per group: ${n}**\n`;
1444
+ out += `**Total sample size: ${2 * n}**\n`;
1445
+ }
1446
+ else if (solveFor === 'power') {
1447
+ // Need sample size from effect; use n = 30 as default context
1448
+ // Actually: compute power for a range of n
1449
+ out += `- **Effect Size (d):** ${fmt(effectSize, 4)}\n`;
1450
+ out += `- **Alpha:** ${alpha}\n\n`;
1451
+ out += `### Power for Various Sample Sizes\n\n`;
1452
+ out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
1453
+ for (const n of [10, 20, 30, 50, 75, 100, 150, 200, 300, 500]) {
1454
+ const noncentrality = effectSize * Math.sqrt(n / 2);
1455
+ const computedPower = 1 - normalCDF(zAlpha - noncentrality);
1456
+ out += `| ${n} | ${2 * n} | ${fmt(computedPower, 4)} |\n`;
1457
+ }
1458
+ }
1459
+ else if (solveFor === 'effect') {
1460
+ // Minimum detectable effect for given n and power
1461
+ out += `- **Alpha:** ${alpha}\n`;
1462
+ out += `- **Power:** ${power}\n\n`;
1463
+ out += `### Minimum Detectable Effect for Various Sample Sizes\n\n`;
1464
+ out += `| n (per group) | Total N | Min Effect (d) | Interpretation |\n|---|---|---|---|\n`;
1465
+ const zBeta = normalQuantile(power);
1466
+ for (const n of [10, 20, 30, 50, 75, 100, 150, 200, 300, 500]) {
1467
+ const d = (zAlpha + zBeta) / Math.sqrt(n / 2);
1468
+ const interp = d >= 0.8 ? 'Large' : d >= 0.5 ? 'Medium' : d >= 0.2 ? 'Small' : 'Tiny';
1469
+ out += `| ${n} | ${2 * n} | ${fmt(d, 4)} | ${interp} |\n`;
1470
+ }
1471
+ }
1472
+ }
1473
+ else if (testType === 'anova') {
1474
+ // One-way ANOVA: f = effect size (Cohen's f)
1475
+ // df1 = k - 1 (assume k=3 groups if not specified)
1476
+ const k = 3;
1477
+ const df1 = k - 1;
1478
+ if (solveFor === 'n') {
1479
+ const zBeta = normalQuantile(power);
1480
+ // Approximation: n per group ~ ((z_alpha + z_beta)^2) / (f^2 * k) + corrections
1481
+ const lambda = effectSize * effectSize; // noncentrality per observation
1482
+ const nPerGroup = Math.ceil(((zAlpha + zBeta) ** 2) / (lambda * k) + df1 / 2);
1483
+ const totalN = nPerGroup * k;
1484
+ out += `- **Effect Size (f):** ${fmt(effectSize, 4)}\n`;
1485
+ out += `- **Groups (k):** ${k}\n`;
1486
+ out += `- **Alpha:** ${alpha}\n`;
1487
+ out += `- **Power:** ${power}\n\n`;
1488
+ out += `### Result\n\n`;
1489
+ out += `**Required sample size per group: ~${nPerGroup}**\n`;
1490
+ out += `**Total sample size: ~${totalN}**\n`;
1491
+ }
1492
+ else if (solveFor === 'power') {
1493
+ out += `- **Effect Size (f):** ${fmt(effectSize, 4)}\n`;
1494
+ out += `- **Groups (k):** ${k}\n`;
1495
+ out += `- **Alpha:** ${alpha}\n\n`;
1496
+ out += `### Power for Various Sample Sizes\n\n`;
1497
+ out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
1498
+ for (const n of [10, 20, 30, 50, 75, 100, 150]) {
1499
+ const lambda = n * k * effectSize * effectSize;
1500
+ // Power approx via noncentral F → normal approximation
1501
+ const ncp = Math.sqrt(lambda);
1502
+ const critVal = normalQuantile(1 - alpha);
1503
+ const computedPower = 1 - normalCDF(critVal - ncp);
1504
+ out += `| ${n} | ${n * k} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
1505
+ }
1506
+ }
1507
+ else {
1508
+ const zBeta = normalQuantile(power);
1509
+ out += `- **Alpha:** ${alpha}\n`;
1510
+ out += `- **Power:** ${power}\n`;
1511
+ out += `- **Groups (k):** ${k}\n\n`;
1512
+ out += `### Minimum Detectable Effect (f) for Various Sample Sizes\n\n`;
1513
+ out += `| n (per group) | Total N | Min Effect (f) | Interpretation |\n|---|---|---|---|\n`;
1514
+ for (const n of [10, 20, 30, 50, 75, 100, 150, 200]) {
1515
+ const f = (zAlpha + zBeta) / Math.sqrt(n * k);
1516
+ const interp = f >= 0.4 ? 'Large' : f >= 0.25 ? 'Medium' : f >= 0.1 ? 'Small' : 'Tiny';
1517
+ out += `| ${n} | ${n * k} | ${fmt(f, 4)} | ${interp} |\n`;
1518
+ }
1519
+ }
1520
+ }
1521
+ else if (testType === 'chi_square') {
1522
+ // Chi-square test: w = effect size, df = (r-1)(c-1)
1523
+ const df = 1; // default for 2x2
1524
+ if (solveFor === 'n') {
1525
+ const zBeta = normalQuantile(power);
1526
+ const n = Math.ceil(((zAlpha + zBeta) / effectSize) ** 2);
1527
+ out += `- **Effect Size (w):** ${fmt(effectSize, 4)}\n`;
1528
+ out += `- **Alpha:** ${alpha}\n`;
1529
+ out += `- **Power:** ${power}\n\n`;
1530
+ out += `### Result\n\n`;
1531
+ out += `**Required total sample size: ${n}**\n`;
1532
+ }
1533
+ else if (solveFor === 'power') {
1534
+ out += `- **Effect Size (w):** ${fmt(effectSize, 4)}\n`;
1535
+ out += `- **Alpha:** ${alpha}\n\n`;
1536
+ out += `### Power for Various Sample Sizes\n\n`;
1537
+ out += `| N | Power |\n|---|---|\n`;
1538
+ for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
1539
+ const ncp = n * effectSize * effectSize;
1540
+ const critVal = normalQuantile(1 - alpha);
1541
+ const computedPower = 1 - normalCDF(critVal - Math.sqrt(ncp));
1542
+ out += `| ${n} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
1543
+ }
1544
+ }
1545
+ else {
1546
+ const zBeta = normalQuantile(power);
1547
+ out += `- **Alpha:** ${alpha}\n`;
1548
+ out += `- **Power:** ${power}\n\n`;
1549
+ out += `### Minimum Detectable Effect (w) for Various Sample Sizes\n\n`;
1550
+ out += `| N | Min Effect (w) | Interpretation |\n|---|---|---|\n`;
1551
+ for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
1552
+ const w = (zAlpha + zBeta) / Math.sqrt(n);
1553
+ const interp = w >= 0.5 ? 'Large' : w >= 0.3 ? 'Medium' : w >= 0.1 ? 'Small' : 'Tiny';
1554
+ out += `| ${n} | ${fmt(w, 4)} | ${interp} |\n`;
1555
+ }
1556
+ }
1557
+ }
1558
+ else if (testType === 'proportion') {
1559
+ // Two-proportion z-test: h = effect size (Cohen's h)
1560
+ if (solveFor === 'n') {
1561
+ const zBeta = normalQuantile(power);
1562
+ const n = Math.ceil(((zAlpha + zBeta) / effectSize) ** 2);
1563
+ out += `- **Effect Size (h):** ${fmt(effectSize, 4)}\n`;
1564
+ out += `- **Alpha:** ${alpha}\n`;
1565
+ out += `- **Power:** ${power}\n\n`;
1566
+ out += `### Result\n\n`;
1567
+ out += `**Required sample size per group: ${n}**\n`;
1568
+ out += `**Total sample size: ${2 * n}**\n`;
1569
+ }
1570
+ else if (solveFor === 'power') {
1571
+ out += `- **Effect Size (h):** ${fmt(effectSize, 4)}\n`;
1572
+ out += `- **Alpha:** ${alpha}\n\n`;
1573
+ out += `### Power for Various Sample Sizes\n\n`;
1574
+ out += `| n (per group) | Total N | Power |\n|---|---|---|\n`;
1575
+ for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
1576
+ const ncp = effectSize * Math.sqrt(n);
1577
+ const computedPower = 1 - normalCDF(zAlpha - ncp);
1578
+ out += `| ${n} | ${2 * n} | ${fmt(Math.min(computedPower, 0.999), 4)} |\n`;
1579
+ }
1580
+ }
1581
+ else {
1582
+ const zBeta = normalQuantile(power);
1583
+ out += `- **Alpha:** ${alpha}\n`;
1584
+ out += `- **Power:** ${power}\n\n`;
1585
+ out += `### Minimum Detectable Effect (h) for Various Sample Sizes\n\n`;
1586
+ out += `| n (per group) | Total N | Min Effect (h) | Interpretation |\n|---|---|---|---|\n`;
1587
+ for (const n of [20, 50, 100, 200, 300, 500, 1000]) {
1588
+ const h = (zAlpha + zBeta) / Math.sqrt(n);
1589
+ const interp = h >= 0.8 ? 'Large' : h >= 0.5 ? 'Medium' : h >= 0.2 ? 'Small' : 'Tiny';
1590
+ out += `| ${n} | ${2 * n} | ${fmt(h, 4)} | ${interp} |\n`;
1591
+ }
1592
+ }
1593
+ }
1594
+ else {
1595
+ return `**Error**: Unknown test_type "${testType}". Use: t_test, anova, chi_square, proportion.`;
1596
+ }
1597
+ out += `\n### Effect Size Guidelines\n\n`;
1598
+ out += `| Test | Small | Medium | Large |\n|---|---|---|---|\n`;
1599
+ out += `| t-test (d) | 0.20 | 0.50 | 0.80 |\n`;
1600
+ out += `| ANOVA (f) | 0.10 | 0.25 | 0.40 |\n`;
1601
+ out += `| Chi-square (w) | 0.10 | 0.30 | 0.50 |\n`;
1602
+ out += `| Proportion (h) | 0.20 | 0.50 | 0.80 |\n`;
1603
+ return out;
1604
+ },
1605
+ });
1606
+ // ── 8. ANOVA TEST ──
1607
+ registerTool({
1608
+ name: 'anova_test',
1609
+ description: 'One-way ANOVA with Tukey HSD and Bonferroni post-hoc tests. Returns F-statistic, p-value, eta-squared, and pairwise comparisons.',
1610
+ parameters: {
1611
+ groups: { type: 'string', description: 'Groups as JSON array of arrays (each inner array is one group\'s data)', required: true },
1612
+ test_type: { type: 'string', description: 'Test type: one_way', required: true },
1613
+ post_hoc: { type: 'string', description: 'Post-hoc test: tukey or bonferroni', required: true },
1614
+ },
1615
+ tier: 'free',
1616
+ async execute(args) {
1617
+ let groups;
1618
+ try {
1619
+ groups = JSON.parse(String(args.groups));
1620
+ }
1621
+ catch {
1622
+ return '**Error**: groups must be a valid JSON array of arrays.';
1623
+ }
1624
+ if (!Array.isArray(groups) || groups.length < 2) {
1625
+ return '**Error**: Need at least 2 groups.';
1626
+ }
1627
+ const postHoc = String(args.post_hoc).toLowerCase();
1628
+ const k = groups.length;
1629
+ const ns = groups.map(g => g.length);
1630
+ const N = sum(ns);
1631
+ const groupMeans = groups.map(g => mean(g));
1632
+ const grandMean = mean(groups.flat());
1633
+ // Sum of squares
1634
+ let ssBetween = 0;
1635
+ for (let i = 0; i < k; i++) {
1636
+ ssBetween += ns[i] * (groupMeans[i] - grandMean) ** 2;
1637
+ }
1638
+ let ssWithin = 0;
1639
+ for (let i = 0; i < k; i++) {
1640
+ for (const val of groups[i]) {
1641
+ ssWithin += (val - groupMeans[i]) ** 2;
1642
+ }
1643
+ }
1644
+ const ssTotal = ssBetween + ssWithin;
1645
+ const dfBetween = k - 1;
1646
+ const dfWithin = N - k;
1647
+ const msBetween = ssBetween / dfBetween;
1648
+ const msWithin = ssWithin / dfWithin;
1649
+ const fStat = msWithin > 0 ? msBetween / msWithin : 0;
1650
+ const pValue = 1 - fCDF(fStat, dfBetween, dfWithin);
1651
+ const etaSquared = ssTotal > 0 ? ssBetween / ssTotal : 0;
1652
+ const omegaSquared = (ssBetween - dfBetween * msWithin) / (ssTotal + msWithin);
1653
+ let out = `## One-Way ANOVA\n\n`;
1654
+ out += `### Group Descriptives\n\n`;
1655
+ out += `| Group | N | Mean | Std Dev |\n|---|---|---|---|\n`;
1656
+ groups.forEach((g, i) => {
1657
+ out += `| ${i + 1} | ${g.length} | ${fmt(groupMeans[i], 4)} | ${fmt(stddev(g), 4)} |\n`;
1658
+ });
1659
+ out += `| **Total** | **${N}** | **${fmt(grandMean, 4)}** | **${fmt(stddev(groups.flat()), 4)}** |\n`;
1660
+ out += `\n### ANOVA Table\n\n`;
1661
+ out += `| Source | SS | df | MS | F | p-value |\n|---|---|---|---|---|---|\n`;
1662
+ out += `| Between | ${fmt(ssBetween, 4)} | ${dfBetween} | ${fmt(msBetween, 4)} | ${fmt(fStat, 4)} | ${pValue < 0.001 ? '<0.001' : fmt(pValue, 4)} |\n`;
1663
+ out += `| Within | ${fmt(ssWithin, 4)} | ${dfWithin} | ${fmt(msWithin, 4)} | | |\n`;
1664
+ out += `| Total | ${fmt(ssTotal, 4)} | ${N - 1} | | | |\n`;
1665
+ out += `\n### Effect Sizes\n\n`;
1666
+ out += `| Measure | Value | Interpretation |\n|---|---|---|\n`;
1667
+ const etaInterp = etaSquared >= 0.14 ? 'Large' : etaSquared >= 0.06 ? 'Medium' : 'Small';
1668
+ out += `| Eta-squared (eta²) | ${fmt(etaSquared, 4)} | ${etaInterp} |\n`;
1669
+ out += `| Omega-squared (omega²) | ${fmt(Math.max(0, omegaSquared), 4)} | — |\n`;
1670
+ // Post-hoc pairwise comparisons
1671
+ out += `\n### Post-Hoc: ${postHoc === 'tukey' ? 'Tukey HSD' : 'Bonferroni'}\n\n`;
1672
+ out += `| Comparison | Diff | SE | Statistic | p-value | Significant |\n|---|---|---|---|---|---|\n`;
1673
+ const nComparisons = k * (k - 1) / 2;
1674
+ for (let i = 0; i < k; i++) {
1675
+ for (let j = i + 1; j < k; j++) {
1676
+ const diff = groupMeans[i] - groupMeans[j];
1677
+ const se = Math.sqrt(msWithin * (1 / ns[i] + 1 / ns[j]));
1678
+ const stat = Math.abs(diff) / se;
1679
+ let pVal;
1680
+ if (postHoc === 'tukey') {
1681
+ // Tukey HSD: compare q = stat * sqrt(2) to Studentized range distribution
1682
+ // Approximate p-value using t distribution
1683
+ const tStat = stat;
1684
+ const rawP = 2 * (1 - tCDF(tStat, dfWithin));
1685
+ // Tukey adjustment: multiply by number of comparisons (conservative)
1686
+ pVal = Math.min(1, rawP * nComparisons);
1687
+ }
1688
+ else {
1689
+ // Bonferroni: regular t-test p-value * number of comparisons
1690
+ const tStat = stat;
1691
+ const rawP = 2 * (1 - tCDF(tStat, dfWithin));
1692
+ pVal = Math.min(1, rawP * nComparisons);
1693
+ }
1694
+ const sig = pVal < 0.001 ? '***' : pVal < 0.01 ? '**' : pVal < 0.05 ? '*' : 'ns';
1695
+ out += `| G${i + 1} vs G${j + 1} | ${fmt(diff, 4)} | ${fmt(se, 4)} | ${fmt(stat, 4)} | ${pVal < 0.001 ? '<0.001' : fmt(pVal, 4)} | ${sig} |\n`;
1696
+ }
1697
+ }
1698
+ out += `\nSignificance: \\*\\*\\* p<0.001, \\*\\* p<0.01, \\* p<0.05, ns = not significant\n`;
1699
+ return out;
1700
+ },
1701
+ });
1702
+ // ── 9. SURVIVAL ANALYSIS ──
1703
+ registerTool({
1704
+ name: 'survival_analysis',
1705
+ description: 'Kaplan-Meier survival curves with log-rank test. Returns survival probabilities at each time point, median survival, hazard ratios, and p-value.',
1706
+ parameters: {
1707
+ times: { type: 'string', description: 'Event/censoring times (comma-separated numbers)', required: true },
1708
+ events: { type: 'string', description: 'Event indicators (comma-separated: 1=event, 0=censored)', required: true },
1709
+ groups: { type: 'string', description: 'Group labels for each subject (comma-separated, optional for comparing 2+ groups)' },
1710
+ },
1711
+ tier: 'free',
1712
+ async execute(args) {
1713
+ const times = parseCSV(String(args.times));
1714
+ const events = parseCSV(String(args.events)).map(v => v === 1 ? 1 : 0);
1715
+ if (times.length !== events.length || times.length < 2) {
1716
+ return '**Error**: times and events must have equal length (minimum 2).';
1717
+ }
1718
+ const groupLabels = args.groups
1719
+ ? String(args.groups).split(',').map(s => s.trim())
1720
+ : null;
1721
+ const n = times.length;
1722
+ // Kaplan-Meier estimator for a single group
1723
+ function kaplanMeier(t, e) {
1724
+ // Sort by time
1725
+ const indices = t.map((_, i) => i).sort((a, b) => t[a] - t[b]);
1726
+ const sortedT = indices.map(i => t[i]);
1727
+ const sortedE = indices.map(i => e[i]);
1728
+ // Get unique event times
1729
+ const uniqueTimes = [];
1730
+ const eventCounts = [];
1731
+ const censorCounts = [];
1732
+ let i = 0;
1733
+ while (i < sortedT.length) {
1734
+ const currentTime = sortedT[i];
1735
+ let nEvents = 0, nCensored = 0;
1736
+ while (i < sortedT.length && sortedT[i] === currentTime) {
1737
+ if (sortedE[i] === 1)
1738
+ nEvents++;
1739
+ else
1740
+ nCensored++;
1741
+ i++;
1742
+ }
1743
+ if (nEvents > 0 || nCensored > 0) {
1744
+ uniqueTimes.push(currentTime);
1745
+ eventCounts.push(nEvents);
1746
+ censorCounts.push(nCensored);
1747
+ }
1748
+ }
1749
+ // Product-limit estimator
1750
+ const survTimes = [0];
1751
+ const survProbs = [1.0];
1752
+ const nRisk = [t.length];
1753
+ const nEvent = [0];
1754
+ const variances = [0];
1755
+ let atRisk = t.length;
1756
+ let survProb = 1.0;
1757
+ let greenwoodSum = 0;
1758
+ for (let j = 0; j < uniqueTimes.length; j++) {
1759
+ const d = eventCounts[j];
1760
+ const c = censorCounts[j];
1761
+ if (d > 0) {
1762
+ survProb *= (atRisk - d) / atRisk;
1763
+ if (atRisk > d) {
1764
+ greenwoodSum += d / (atRisk * (atRisk - d));
1765
+ }
1766
+ survTimes.push(uniqueTimes[j]);
1767
+ survProbs.push(survProb);
1768
+ nRisk.push(atRisk);
1769
+ nEvent.push(d);
1770
+ variances.push(survProb * survProb * greenwoodSum); // Greenwood's formula
1771
+ }
1772
+ atRisk -= d + c;
1773
+ }
1774
+ // Median survival: first time S(t) <= 0.5
1775
+ let medianSurvival = null;
1776
+ for (let j = 1; j < survProbs.length; j++) {
1777
+ if (survProbs[j] <= 0.5) {
1778
+ medianSurvival = survTimes[j];
1779
+ break;
1780
+ }
1781
+ }
1782
+ return {
1783
+ times: survTimes,
1784
+ survival: survProbs,
1785
+ nRisk,
1786
+ nEvent,
1787
+ variance: variances,
1788
+ medianSurvival,
1789
+ };
1790
+ }
1791
+ let out = `## Survival Analysis — Kaplan-Meier\n\n`;
1792
+ if (!groupLabels) {
1793
+ // Single group
1794
+ const km = kaplanMeier(times, events);
1795
+ const totalEvents = events.filter(e => e === 1).length;
1796
+ out += `**N = ${n}** | Events: ${totalEvents} | Censored: ${n - totalEvents}\n\n`;
1797
+ out += `### Survival Table\n\n`;
1798
+ out += `| Time | N at Risk | Events | S(t) | 95% CI |\n|---|---|---|---|---|\n`;
1799
+ for (let i = 0; i < km.times.length; i++) {
1800
+ const se = Math.sqrt(km.variance[i]);
1801
+ const ciLower = Math.max(0, km.survival[i] - 1.96 * se);
1802
+ const ciUpper = Math.min(1, km.survival[i] + 1.96 * se);
1803
+ out += `| ${fmt(km.times[i], 2)} | ${km.nRisk[i]} | ${km.nEvent[i]} | ${fmt(km.survival[i], 4)} | [${fmt(ciLower, 4)}, ${fmt(ciUpper, 4)}] |\n`;
1804
+ }
1805
+ out += `\n**Median Survival:** ${km.medianSurvival !== null ? fmt(km.medianSurvival, 2) : 'Not reached'}\n`;
1806
+ }
1807
+ else {
1808
+ // Multiple groups — KM per group + log-rank test
1809
+ if (groupLabels.length !== n) {
1810
+ return `**Error**: groups length (${groupLabels.length}) must match times length (${n}).`;
1811
+ }
1812
+ const uniqueGroups = [...new Set(groupLabels)];
1813
+ const groupData = {};
1814
+ for (const g of uniqueGroups) {
1815
+ groupData[g] = { times: [], events: [] };
1816
+ }
1817
+ for (let i = 0; i < n; i++) {
1818
+ groupData[groupLabels[i]].times.push(times[i]);
1819
+ groupData[groupLabels[i]].events.push(events[i]);
1820
+ }
1821
+ // KM for each group
1822
+ const kmResults = {};
1823
+ for (const g of uniqueGroups) {
1824
+ kmResults[g] = kaplanMeier(groupData[g].times, groupData[g].events);
1825
+ }
1826
+ // Log-rank test
1827
+ // Get all unique event times across all groups
1828
+ const allEventTimes = [...new Set(times.filter((_, i) => events[i] === 1))].sort((a, b) => a - b);
1829
+ let chiSq = 0;
1830
+ const observed = {};
1831
+ const expected = {};
1832
+ for (const g of uniqueGroups) {
1833
+ observed[g] = 0;
1834
+ expected[g] = 0;
1835
+ }
1836
+ for (const t of allEventTimes) {
1837
+ // At each event time, count at-risk and events per group
1838
+ const atRiskPerGroup = {};
1839
+ const eventsPerGroup = {};
1840
+ let totalAtRisk = 0;
1841
+ let totalEvents = 0;
1842
+ for (const g of uniqueGroups) {
1843
+ const gd = groupData[g];
1844
+ let risk = 0, ev = 0;
1845
+ for (let i = 0; i < gd.times.length; i++) {
1846
+ if (gd.times[i] >= t)
1847
+ risk++;
1848
+ if (gd.times[i] === t && gd.events[i] === 1)
1849
+ ev++;
1850
+ }
1851
+ atRiskPerGroup[g] = risk;
1852
+ eventsPerGroup[g] = ev;
1853
+ totalAtRisk += risk;
1854
+ totalEvents += ev;
1855
+ }
1856
+ if (totalAtRisk === 0)
1857
+ continue;
1858
+ for (const g of uniqueGroups) {
1859
+ observed[g] += eventsPerGroup[g];
1860
+ expected[g] += (atRiskPerGroup[g] / totalAtRisk) * totalEvents;
1861
+ }
1862
+ }
1863
+ // Log-rank chi-square = sum((O-E)^2 / E) with df = k-1
1864
+ for (const g of uniqueGroups) {
1865
+ if (expected[g] > 0) {
1866
+ chiSq += (observed[g] - expected[g]) ** 2 / expected[g];
1867
+ }
1868
+ }
1869
+ const lrDf = uniqueGroups.length - 1;
1870
+ const lrPValue = 1 - chiSquareCDF(chiSq, lrDf);
1871
+ // Hazard ratio (for 2 groups): HR = (O1/E1) / (O2/E2)
1872
+ let hazardRatio = null;
1873
+ if (uniqueGroups.length === 2) {
1874
+ const g1 = uniqueGroups[0], g2 = uniqueGroups[1];
1875
+ if (expected[g1] > 0 && expected[g2] > 0) {
1876
+ hazardRatio = (observed[g1] / expected[g1]) / (observed[g2] / expected[g2]);
1877
+ }
1878
+ }
1879
+ out += `### Group Summary\n\n`;
1880
+ out += `| Group | N | Events | Censored | Median Survival |\n|---|---|---|---|---|\n`;
1881
+ for (const g of uniqueGroups) {
1882
+ const gd = groupData[g];
1883
+ const nEvents = gd.events.filter(e => e === 1).length;
1884
+ out += `| ${g} | ${gd.times.length} | ${nEvents} | ${gd.times.length - nEvents} | ${kmResults[g].medianSurvival !== null ? fmt(kmResults[g].medianSurvival, 2) : 'NR'} |\n`;
1885
+ }
1886
+ out += `\n### Log-Rank Test\n\n`;
1887
+ out += `| Statistic | Value |\n|---|---|\n`;
1888
+ out += `| Chi-square | ${fmt(chiSq, 4)} |\n`;
1889
+ out += `| df | ${lrDf} |\n`;
1890
+ out += `| p-value | ${lrPValue < 0.001 ? '<0.001' : fmt(lrPValue, 4)} |\n`;
1891
+ if (hazardRatio !== null) {
1892
+ out += `| Hazard Ratio (${uniqueGroups[0]} vs ${uniqueGroups[1]}) | ${fmt(hazardRatio, 4)} |\n`;
1893
+ }
1894
+ out += `\n### Observed vs Expected Events\n\n`;
1895
+ out += `| Group | Observed | Expected | O/E |\n|---|---|---|---|\n`;
1896
+ for (const g of uniqueGroups) {
1897
+ out += `| ${g} | ${fmt(observed[g], 1)} | ${fmt(expected[g], 2)} | ${fmt(expected[g] > 0 ? observed[g] / expected[g] : 0, 4)} |\n`;
1898
+ }
1899
+ // Survival table for each group
1900
+ for (const g of uniqueGroups) {
1901
+ const km = kmResults[g];
1902
+ out += `\n### Survival Table — ${g}\n\n`;
1903
+ out += `| Time | N at Risk | Events | S(t) |\n|---|---|---|---|\n`;
1904
+ for (let i = 0; i < km.times.length; i++) {
1905
+ out += `| ${fmt(km.times[i], 2)} | ${km.nRisk[i]} | ${km.nEvent[i]} | ${fmt(km.survival[i], 4)} |\n`;
1906
+ }
1907
+ }
1908
+ }
1909
+ return out;
1910
+ },
1911
+ });
1912
+ // ── 10. VIZ CODEGEN ──
1913
+ registerTool({
1914
+ name: 'viz_codegen',
1915
+ description: 'Generate publication-quality plot code in Python (matplotlib/seaborn) or R (ggplot2). Supports: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq plot types.',
1916
+ parameters: {
1917
+ chart_type: { type: 'string', description: 'Chart type: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq', required: true },
1918
+ language: { type: 'string', description: 'Output language: python or r', required: true },
1919
+ data_description: { type: 'string', description: 'Description of the data (variable names, types, context)', required: true },
1920
+ title: { type: 'string', description: 'Plot title', required: true },
1921
+ style: { type: 'string', description: 'Style: publication or presentation (default publication)' },
1922
+ },
1923
+ tier: 'free',
1924
+ async execute(args) {
1925
+ const chartType = String(args.chart_type).toLowerCase().replace(/[_\s-]/g, '_');
1926
+ const language = String(args.language).toLowerCase();
1927
+ const dataDesc = String(args.data_description);
1928
+ const title = String(args.title);
1929
+ const style = String(args.style || 'publication').toLowerCase();
1930
+ if (language !== 'python' && language !== 'r') {
1931
+ return `**Error**: language must be "python" or "r".`;
1932
+ }
1933
+ const isPub = style === 'publication';
1934
+ let code = '';
1935
+ if (language === 'python') {
1936
+ // Python: matplotlib + seaborn
1937
+ const preamble = [
1938
+ `import numpy as np`,
1939
+ `import pandas as pd`,
1940
+ `import matplotlib.pyplot as plt`,
1941
+ `import seaborn as sns`,
1942
+ `from matplotlib import rcParams`,
1943
+ ``,
1944
+ `# ${isPub ? 'Publication' : 'Presentation'} style`,
1945
+ isPub
1946
+ ? `plt.style.use('seaborn-v0_8-whitegrid')\nrcParams.update({'font.family': 'serif', 'font.serif': ['Times New Roman'], 'font.size': 10, 'axes.labelsize': 11, 'axes.titlesize': 12, 'figure.dpi': 300, 'savefig.dpi': 300, 'figure.figsize': (6, 4)})`
1947
+ : `plt.style.use('seaborn-v0_8-darkgrid')\nrcParams.update({'font.family': 'sans-serif', 'font.size': 14, 'axes.labelsize': 16, 'axes.titlesize': 18, 'figure.dpi': 150, 'figure.figsize': (10, 7)})`,
1948
+ ``,
1949
+ `# --- Data: ${dataDesc} ---`,
1950
+ `# Replace with your actual data`,
1951
+ ].join('\n');
1952
+ if (chartType === 'scatter') {
1953
+ code = `${preamble}
1954
+ x = np.random.randn(100)
1955
+ y = 2.5 * x + np.random.randn(100) * 0.5
1956
+
1957
+ fig, ax = plt.subplots()
1958
+ scatter = ax.scatter(x, y, c='steelblue', alpha=0.7, edgecolors='white', s=50)
1959
+
1960
+ # Regression line
1961
+ z = np.polyfit(x, y, 1)
1962
+ p = np.poly1d(z)
1963
+ x_line = np.linspace(x.min(), x.max(), 100)
1964
+ ax.plot(x_line, p(x_line), 'r--', linewidth=1.5, label=f'y = {z[0]:.2f}x + {z[1]:.2f}')
1965
+
1966
+ ax.set_xlabel('X variable')
1967
+ ax.set_ylabel('Y variable')
1968
+ ax.set_title('${title}')
1969
+ ax.legend(frameon=True)
1970
+ plt.tight_layout()
1971
+ plt.savefig('scatter_plot.png', bbox_inches='tight')
1972
+ plt.show()`;
1973
+ }
1974
+ else if (chartType === 'histogram') {
1975
+ code = `${preamble}
1976
+ data = np.random.randn(500)
1977
+
1978
+ fig, ax = plt.subplots()
1979
+ n, bins, patches = ax.hist(data, bins=30, color='steelblue', edgecolor='white',
1980
+ alpha=0.8, density=True)
1981
+
1982
+ # Overlay normal curve
1983
+ from scipy import stats
1984
+ xmin, xmax = ax.get_xlim()
1985
+ x = np.linspace(xmin, xmax, 100)
1986
+ mu, std = stats.norm.fit(data)
1987
+ ax.plot(x, stats.norm.pdf(x, mu, std), 'r-', linewidth=2,
1988
+ label=f'Normal fit (mu={mu:.2f}, sigma={std:.2f})')
1989
+
1990
+ ax.set_xlabel('Value')
1991
+ ax.set_ylabel('Density')
1992
+ ax.set_title('${title}')
1993
+ ax.legend(frameon=True)
1994
+ plt.tight_layout()
1995
+ plt.savefig('histogram.png', bbox_inches='tight')
1996
+ plt.show()`;
1997
+ }
1998
+ else if (chartType === 'heatmap') {
1999
+ code = `${preamble}
2000
+ data = np.random.randn(10, 10)
2001
+ labels = [f'Var{i+1}' for i in range(10)]
2002
+
2003
+ fig, ax = plt.subplots(figsize=(8, 6))
2004
+ im = sns.heatmap(data, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
2005
+ xticklabels=labels, yticklabels=labels,
2006
+ linewidths=0.5, ax=ax, cbar_kws={'shrink': 0.8})
2007
+ ax.set_title('${title}')
2008
+ plt.tight_layout()
2009
+ plt.savefig('heatmap.png', bbox_inches='tight')
2010
+ plt.show()`;
2011
+ }
2012
+ else if (chartType === 'boxplot') {
2013
+ code = `${preamble}
2014
+ groups = ['Group A', 'Group B', 'Group C', 'Group D']
2015
+ data = [np.random.randn(50) + i for i, _ in enumerate(groups)]
2016
+ df = pd.DataFrame({g: pd.Series(d) for g, d in zip(groups, data)})
2017
+ df_melt = df.melt(var_name='Group', value_name='Value')
2018
+
2019
+ fig, ax = plt.subplots()
2020
+ bp = sns.boxplot(data=df_melt, x='Group', y='Value', palette='Set2',
2021
+ width=0.6, flierprops=dict(marker='o', markersize=4), ax=ax)
2022
+ sns.stripplot(data=df_melt, x='Group', y='Value', color='black',
2023
+ alpha=0.3, size=3, jitter=True, ax=ax)
2024
+
2025
+ ax.set_xlabel('')
2026
+ ax.set_ylabel('Value')
2027
+ ax.set_title('${title}')
2028
+ plt.tight_layout()
2029
+ plt.savefig('boxplot.png', bbox_inches='tight')
2030
+ plt.show()`;
2031
+ }
2032
+ else if (chartType === 'violin') {
2033
+ code = `${preamble}
2034
+ groups = ['Group A', 'Group B', 'Group C']
2035
+ data = [np.random.randn(100) + i * 0.5 for i, _ in enumerate(groups)]
2036
+ df = pd.DataFrame({g: pd.Series(d) for g, d in zip(groups, data)})
2037
+ df_melt = df.melt(var_name='Group', value_name='Value')
2038
+
2039
+ fig, ax = plt.subplots()
2040
+ sns.violinplot(data=df_melt, x='Group', y='Value', palette='muted',
2041
+ inner='box', linewidth=1, ax=ax)
2042
+
2043
+ ax.set_xlabel('')
2044
+ ax.set_ylabel('Value')
2045
+ ax.set_title('${title}')
2046
+ plt.tight_layout()
2047
+ plt.savefig('violin_plot.png', bbox_inches='tight')
2048
+ plt.show()`;
2049
+ }
2050
+ else if (chartType === 'line') {
2051
+ code = `${preamble}
2052
+ x = np.arange(0, 50)
2053
+ y1 = np.cumsum(np.random.randn(50)) + 10
2054
+ y2 = np.cumsum(np.random.randn(50)) + 10
2055
+
2056
+ fig, ax = plt.subplots()
2057
+ ax.plot(x, y1, '-o', color='steelblue', markersize=3, linewidth=1.5, label='Series A')
2058
+ ax.plot(x, y2, '-s', color='coral', markersize=3, linewidth=1.5, label='Series B')
2059
+ ax.fill_between(x, y1 - 1, y1 + 1, alpha=0.15, color='steelblue')
2060
+ ax.fill_between(x, y2 - 1, y2 + 1, alpha=0.15, color='coral')
2061
+
2062
+ ax.set_xlabel('Time')
2063
+ ax.set_ylabel('Value')
2064
+ ax.set_title('${title}')
2065
+ ax.legend(frameon=True)
2066
+ plt.tight_layout()
2067
+ plt.savefig('line_plot.png', bbox_inches='tight')
2068
+ plt.show()`;
2069
+ }
2070
+ else if (chartType === 'bar') {
2071
+ code = `${preamble}
2072
+ categories = ['Cat A', 'Cat B', 'Cat C', 'Cat D', 'Cat E']
2073
+ values = [23, 45, 56, 78, 32]
2074
+ errors = [3, 5, 4, 6, 3]
2075
+
2076
+ fig, ax = plt.subplots()
2077
+ bars = ax.bar(categories, values, yerr=errors, capsize=4,
2078
+ color='steelblue', edgecolor='white', alpha=0.85)
2079
+
2080
+ # Add value labels
2081
+ for bar, val in zip(bars, values):
2082
+ ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 2,
2083
+ str(val), ha='center', va='bottom', fontsize=9)
2084
+
2085
+ ax.set_ylabel('Value')
2086
+ ax.set_title('${title}')
2087
+ ax.spines['top'].set_visible(False)
2088
+ ax.spines['right'].set_visible(False)
2089
+ plt.tight_layout()
2090
+ plt.savefig('bar_plot.png', bbox_inches='tight')
2091
+ plt.show()`;
2092
+ }
2093
+ else if (chartType === 'kaplan_meier') {
2094
+ code = `${preamble}
2095
+ from lifelines import KaplanMeierFitter
2096
+ from lifelines.statistics import logrank_test
2097
+
2098
+ # Group 1
2099
+ T1 = np.array([6, 6, 6, 7, 10, 13, 16, 22, 23, 6, 9, 10, 11, 17, 19, 20, 25, 32, 32, 34])
2100
+ E1 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
2101
+
2102
+ # Group 2
2103
+ T2 = np.array([1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22])
2104
+ E2 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0])
2105
+
2106
+ fig, ax = plt.subplots()
2107
+ kmf1 = KaplanMeierFitter()
2108
+ kmf1.fit(T1, E1, label='Treatment')
2109
+ kmf1.plot_survival_function(ax=ax, ci_show=True, color='steelblue')
2110
+
2111
+ kmf2 = KaplanMeierFitter()
2112
+ kmf2.fit(T2, E2, label='Control')
2113
+ kmf2.plot_survival_function(ax=ax, ci_show=True, color='coral')
2114
+
2115
+ # Log-rank test
2116
+ result = logrank_test(T1, T2, E1, E2)
2117
+ ax.text(0.6, 0.9, f'Log-rank p = {result.p_value:.4f}',
2118
+ transform=ax.transAxes, fontsize=10,
2119
+ bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
2120
+
2121
+ ax.set_xlabel('Time')
2122
+ ax.set_ylabel('Survival Probability')
2123
+ ax.set_title('${title}')
2124
+ ax.set_ylim(0, 1.05)
2125
+ ax.legend(frameon=True)
2126
+ plt.tight_layout()
2127
+ plt.savefig('km_plot.png', bbox_inches='tight')
2128
+ plt.show()`;
2129
+ }
2130
+ else if (chartType === 'forest') {
2131
+ code = `${preamble}
2132
+ studies = ['Study A', 'Study B', 'Study C', 'Study D', 'Study E', 'Overall']
2133
+ effects = [0.85, 1.12, 0.72, 0.95, 1.05, 0.92]
2134
+ ci_lower = [0.65, 0.88, 0.55, 0.75, 0.82, 0.80]
2135
+ ci_upper = [1.10, 1.42, 0.94, 1.20, 1.34, 1.06]
2136
+ weights = [20, 25, 15, 22, 18, None]
2137
+
2138
+ fig, ax = plt.subplots(figsize=(8, 5))
2139
+ y_pos = np.arange(len(studies))
2140
+
2141
+ for i, (study, eff, lo, hi) in enumerate(zip(studies, effects, ci_lower, ci_upper)):
2142
+ color = 'darkred' if study == 'Overall' else 'steelblue'
2143
+ marker = 'D' if study == 'Overall' else 'o'
2144
+ size = 10 if study == 'Overall' else 7
2145
+ ax.plot(eff, i, marker, color=color, markersize=size, zorder=3)
2146
+ ax.hlines(i, lo, hi, color=color, linewidth=2)
2147
+ label = f'{eff:.2f} [{lo:.2f}, {hi:.2f}]'
2148
+ ax.text(max(ci_upper) + 0.1, i, label, va='center', fontsize=9)
2149
+
2150
+ ax.axvline(1.0, color='gray', linestyle='--', linewidth=0.8)
2151
+ ax.set_yticks(y_pos)
2152
+ ax.set_yticklabels(studies)
2153
+ ax.set_xlabel('Effect Size (OR / HR)')
2154
+ ax.set_title('${title}')
2155
+ ax.invert_yaxis()
2156
+ ax.spines['top'].set_visible(False)
2157
+ ax.spines['right'].set_visible(False)
2158
+ plt.tight_layout()
2159
+ plt.savefig('forest_plot.png', bbox_inches='tight')
2160
+ plt.show()`;
2161
+ }
2162
+ else if (chartType === 'volcano') {
2163
+ code = `${preamble}
2164
+ np.random.seed(42)
2165
+ n_genes = 5000
2166
+ log2fc = np.random.randn(n_genes) * 1.5
2167
+ pvals = 10 ** (-np.abs(np.random.randn(n_genes) * 2))
2168
+ neg_log10p = -np.log10(pvals)
2169
+
2170
+ # Categorize
2171
+ fc_threshold = 1.0
2172
+ p_threshold = 0.05
2173
+ colors = []
2174
+ for fc, p in zip(log2fc, pvals):
2175
+ if abs(fc) > fc_threshold and p < p_threshold:
2176
+ colors.append('red' if fc > 0 else 'blue')
2177
+ else:
2178
+ colors.append('gray')
2179
+
2180
+ fig, ax = plt.subplots()
2181
+ ax.scatter(log2fc, neg_log10p, c=colors, alpha=0.5, s=10, edgecolors='none')
2182
+ ax.axhline(-np.log10(p_threshold), color='gray', linestyle='--', linewidth=0.8)
2183
+ ax.axvline(-fc_threshold, color='gray', linestyle='--', linewidth=0.8)
2184
+ ax.axvline(fc_threshold, color='gray', linestyle='--', linewidth=0.8)
2185
+
2186
+ n_up = sum(1 for c in colors if c == 'red')
2187
+ n_down = sum(1 for c in colors if c == 'blue')
2188
+ ax.text(0.02, 0.98, f'Up: {n_up}\\nDown: {n_down}', transform=ax.transAxes,
2189
+ va='top', fontsize=9, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
2190
+
2191
+ ax.set_xlabel('log2 Fold Change')
2192
+ ax.set_ylabel('-log10(p-value)')
2193
+ ax.set_title('${title}')
2194
+ plt.tight_layout()
2195
+ plt.savefig('volcano_plot.png', bbox_inches='tight')
2196
+ plt.show()`;
2197
+ }
2198
+ else if (chartType === 'qq') {
2199
+ code = `${preamble}
2200
+ from scipy import stats
2201
+
2202
+ data = np.random.randn(200) * 2 + 5 # Replace with your data
2203
+
2204
+ fig, ax = plt.subplots()
2205
+ (osm, osr), (slope, intercept, r) = stats.probplot(data, dist='norm', plot=ax)
2206
+ ax.get_lines()[0].set(color='steelblue', markersize=4, alpha=0.7)
2207
+ ax.get_lines()[1].set(color='red', linewidth=1.5)
2208
+
2209
+ ax.set_title('${title}')
2210
+ ax.text(0.05, 0.95, f'R² = {r**2:.4f}', transform=ax.transAxes,
2211
+ va='top', fontsize=10, bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
2212
+ plt.tight_layout()
2213
+ plt.savefig('qq_plot.png', bbox_inches='tight')
2214
+ plt.show()`;
2215
+ }
2216
+ else {
2217
+ return `**Error**: Unknown chart_type "${chartType}". Supported: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq.`;
2218
+ }
2219
+ }
2220
+ else {
2221
+ // R: ggplot2
2222
+ const rPreamble = [
2223
+ `library(ggplot2)`,
2224
+ `library(dplyr)`,
2225
+ ``,
2226
+ `# ${isPub ? 'Publication' : 'Presentation'} theme`,
2227
+ isPub
2228
+ ? `theme_pub <- theme_bw() + theme(text = element_text(family = "serif", size = 10), plot.title = element_text(size = 12, face = "bold"), axis.title = element_text(size = 11), legend.position = "bottom")`
2229
+ : `theme_pub <- theme_minimal() + theme(text = element_text(size = 14), plot.title = element_text(size = 18, face = "bold"), axis.title = element_text(size = 16), legend.position = "bottom")`,
2230
+ ``,
2231
+ `# --- Data: ${dataDesc} ---`,
2232
+ `# Replace with your actual data`,
2233
+ ].join('\n');
2234
+ if (chartType === 'scatter') {
2235
+ code = `${rPreamble}
2236
+ set.seed(42)
2237
+ df <- data.frame(x = rnorm(100), y = 2.5 * rnorm(100) + rnorm(100) * 0.5)
2238
+
2239
+ p <- ggplot(df, aes(x = x, y = y)) +
2240
+ geom_point(color = "steelblue", alpha = 0.7, size = 2) +
2241
+ geom_smooth(method = "lm", color = "red", linetype = "dashed", se = TRUE, alpha = 0.2) +
2242
+ labs(title = "${title}", x = "X variable", y = "Y variable") +
2243
+ theme_pub
2244
+
2245
+ ggsave("scatter_plot.pdf", p, width = 6, height = 4)
2246
+ print(p)`;
2247
+ }
2248
+ else if (chartType === 'histogram') {
2249
+ code = `${rPreamble}
2250
+ set.seed(42)
2251
+ df <- data.frame(value = rnorm(500))
2252
+
2253
+ p <- ggplot(df, aes(x = value)) +
2254
+ geom_histogram(aes(y = after_stat(density)), bins = 30, fill = "steelblue",
2255
+ color = "white", alpha = 0.8) +
2256
+ stat_function(fun = dnorm, args = list(mean = mean(df$value), sd = sd(df$value)),
2257
+ color = "red", linewidth = 1) +
2258
+ labs(title = "${title}", x = "Value", y = "Density") +
2259
+ theme_pub
2260
+
2261
+ ggsave("histogram.pdf", p, width = 6, height = 4)
2262
+ print(p)`;
2263
+ }
2264
+ else if (chartType === 'heatmap') {
2265
+ code = `${rPreamble}
2266
+ library(reshape2)
2267
+
2268
+ set.seed(42)
2269
+ mat <- matrix(rnorm(100), nrow = 10)
2270
+ colnames(mat) <- paste0("Var", 1:10)
2271
+ rownames(mat) <- paste0("Var", 1:10)
2272
+ df <- melt(mat)
2273
+
2274
+ p <- ggplot(df, aes(x = Var2, y = Var1, fill = value)) +
2275
+ geom_tile(color = "white") +
2276
+ geom_text(aes(label = round(value, 2)), size = 2.5) +
2277
+ scale_fill_gradient2(low = "steelblue", mid = "white", high = "coral", midpoint = 0) +
2278
+ labs(title = "${title}", x = "", y = "", fill = "Value") +
2279
+ theme_pub + theme(axis.text.x = element_text(angle = 45, hjust = 1))
2280
+
2281
+ ggsave("heatmap.pdf", p, width = 8, height = 6)
2282
+ print(p)`;
2283
+ }
2284
+ else if (chartType === 'boxplot') {
2285
+ code = `${rPreamble}
2286
+ set.seed(42)
2287
+ df <- data.frame(
2288
+ Group = rep(c("A", "B", "C", "D"), each = 50),
2289
+ Value = c(rnorm(50), rnorm(50, 1), rnorm(50, 2), rnorm(50, 3))
2290
+ )
2291
+
2292
+ p <- ggplot(df, aes(x = Group, y = Value, fill = Group)) +
2293
+ geom_boxplot(width = 0.6, outlier.shape = 1, alpha = 0.8) +
2294
+ geom_jitter(width = 0.15, alpha = 0.3, size = 1) +
2295
+ scale_fill_brewer(palette = "Set2") +
2296
+ labs(title = "${title}", x = "", y = "Value") +
2297
+ theme_pub + theme(legend.position = "none")
2298
+
2299
+ ggsave("boxplot.pdf", p, width = 6, height = 4)
2300
+ print(p)`;
2301
+ }
2302
+ else if (chartType === 'violin') {
2303
+ code = `${rPreamble}
2304
+ set.seed(42)
2305
+ df <- data.frame(
2306
+ Group = rep(c("A", "B", "C"), each = 100),
2307
+ Value = c(rnorm(100), rnorm(100, 0.5), rnorm(100, 1))
2308
+ )
2309
+
2310
+ p <- ggplot(df, aes(x = Group, y = Value, fill = Group)) +
2311
+ geom_violin(trim = FALSE, alpha = 0.8) +
2312
+ geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
2313
+ scale_fill_brewer(palette = "Pastel1") +
2314
+ labs(title = "${title}", x = "", y = "Value") +
2315
+ theme_pub + theme(legend.position = "none")
2316
+
2317
+ ggsave("violin_plot.pdf", p, width = 6, height = 4)
2318
+ print(p)`;
2319
+ }
2320
+ else if (chartType === 'line') {
2321
+ code = `${rPreamble}
2322
+ set.seed(42)
2323
+ df <- data.frame(
2324
+ Time = rep(1:50, 2),
2325
+ Value = c(cumsum(rnorm(50)) + 10, cumsum(rnorm(50)) + 10),
2326
+ Series = rep(c("A", "B"), each = 50)
2327
+ )
2328
+
2329
+ p <- ggplot(df, aes(x = Time, y = Value, color = Series)) +
2330
+ geom_line(linewidth = 0.8) +
2331
+ geom_point(size = 1, alpha = 0.5) +
2332
+ geom_ribbon(aes(ymin = Value - 1, ymax = Value + 1, fill = Series), alpha = 0.15, color = NA) +
2333
+ scale_color_manual(values = c("steelblue", "coral")) +
2334
+ scale_fill_manual(values = c("steelblue", "coral")) +
2335
+ labs(title = "${title}", x = "Time", y = "Value") +
2336
+ theme_pub
2337
+
2338
+ ggsave("line_plot.pdf", p, width = 6, height = 4)
2339
+ print(p)`;
2340
+ }
2341
+ else if (chartType === 'bar') {
2342
+ code = `${rPreamble}
2343
+ df <- data.frame(
2344
+ Category = c("Cat A", "Cat B", "Cat C", "Cat D", "Cat E"),
2345
+ Value = c(23, 45, 56, 78, 32),
2346
+ SE = c(3, 5, 4, 6, 3)
2347
+ )
2348
+
2349
+ p <- ggplot(df, aes(x = reorder(Category, -Value), y = Value)) +
2350
+ geom_col(fill = "steelblue", alpha = 0.85, width = 0.65) +
2351
+ geom_errorbar(aes(ymin = Value - SE, ymax = Value + SE), width = 0.2) +
2352
+ geom_text(aes(label = Value), vjust = -0.5, size = 3.5) +
2353
+ labs(title = "${title}", x = "", y = "Value") +
2354
+ theme_pub
2355
+
2356
+ ggsave("bar_plot.pdf", p, width = 6, height = 4)
2357
+ print(p)`;
2358
+ }
2359
+ else if (chartType === 'kaplan_meier') {
2360
+ code = `${rPreamble}
2361
+ library(survival)
2362
+ library(survminer)
2363
+
2364
+ df <- data.frame(
2365
+ time = c(6, 6, 6, 7, 10, 13, 16, 22, 23, 6, 9, 10, 11, 17, 19, 20, 25, 32, 32, 34,
2366
+ 1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22),
2367
+ event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2368
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0),
2369
+ group = c(rep("Treatment", 20), rep("Control", 20))
2370
+ )
2371
+
2372
+ fit <- survfit(Surv(time, event) ~ group, data = df)
2373
+ p <- ggsurvplot(fit, data = df, pval = TRUE, conf.int = TRUE,
2374
+ risk.table = TRUE, palette = c("steelblue", "coral"),
2375
+ title = "${title}",
2376
+ xlab = "Time", ylab = "Survival Probability",
2377
+ ggtheme = theme_bw() + theme(text = element_text(family = "serif")))
2378
+
2379
+ ggsave("km_plot.pdf", plot = print(p), width = 8, height = 6)`;
2380
+ }
2381
+ else if (chartType === 'forest') {
2382
+ code = `${rPreamble}
2383
+ library(forestplot)
2384
+
2385
+ df <- data.frame(
2386
+ study = c("Study A", "Study B", "Study C", "Study D", "Study E", "Overall"),
2387
+ estimate = c(0.85, 1.12, 0.72, 0.95, 1.05, 0.92),
2388
+ lower = c(0.65, 0.88, 0.55, 0.75, 0.82, 0.80),
2389
+ upper = c(1.10, 1.42, 0.94, 1.20, 1.34, 1.06)
2390
+ )
2391
+
2392
+ p <- ggplot(df, aes(y = reorder(study, desc(row_number())), x = estimate, xmin = lower, xmax = upper)) +
2393
+ geom_pointrange(aes(color = study == "Overall"), size = 0.8) +
2394
+ geom_vline(xintercept = 1, linetype = "dashed", color = "gray50") +
2395
+ scale_color_manual(values = c("steelblue", "darkred"), guide = "none") +
2396
+ labs(title = "${title}", x = "Effect Size (OR / HR)", y = "") +
2397
+ theme_pub
2398
+
2399
+ ggsave("forest_plot.pdf", p, width = 8, height = 5)
2400
+ print(p)`;
2401
+ }
2402
+ else if (chartType === 'volcano') {
2403
+ code = `${rPreamble}
2404
+ set.seed(42)
2405
+ df <- data.frame(
2406
+ log2fc = rnorm(5000) * 1.5,
2407
+ pvalue = 10^(-abs(rnorm(5000) * 2))
2408
+ ) %>%
2409
+ mutate(
2410
+ neg_log10p = -log10(pvalue),
2411
+ category = case_when(
2412
+ abs(log2fc) > 1 & pvalue < 0.05 & log2fc > 0 ~ "Up",
2413
+ abs(log2fc) > 1 & pvalue < 0.05 & log2fc < 0 ~ "Down",
2414
+ TRUE ~ "NS"
2415
+ )
2416
+ )
2417
+
2418
+ p <- ggplot(df, aes(x = log2fc, y = neg_log10p, color = category)) +
2419
+ geom_point(alpha = 0.5, size = 1) +
2420
+ scale_color_manual(values = c("Down" = "blue", "NS" = "gray70", "Up" = "red")) +
2421
+ geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "gray50") +
2422
+ geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "gray50") +
2423
+ labs(title = "${title}", x = "log2 Fold Change", y = "-log10(p-value)", color = "") +
2424
+ theme_pub
2425
+
2426
+ ggsave("volcano_plot.pdf", p, width = 6, height = 5)
2427
+ print(p)`;
2428
+ }
2429
+ else if (chartType === 'qq') {
2430
+ code = `${rPreamble}
2431
+ set.seed(42)
2432
+ data <- rnorm(200) * 2 + 5 # Replace with your data
2433
+
2434
+ df <- data.frame(value = data)
2435
+
2436
+ p <- ggplot(df, aes(sample = value)) +
2437
+ stat_qq(color = "steelblue", alpha = 0.7, size = 2) +
2438
+ stat_qq_line(color = "red", linewidth = 1) +
2439
+ labs(title = "${title}", x = "Theoretical Quantiles", y = "Sample Quantiles") +
2440
+ theme_pub
2441
+
2442
+ shapiro <- shapiro.test(data)
2443
+ p <- p + annotate("text", x = -Inf, y = Inf, hjust = -0.1, vjust = 1.5,
2444
+ label = paste0("Shapiro-Wilk p = ", format.pval(shapiro$p.value, digits = 4)),
2445
+ size = 3.5)
2446
+
2447
+ ggsave("qq_plot.pdf", p, width = 6, height = 4)
2448
+ print(p)`;
2449
+ }
2450
+ else {
2451
+ return `**Error**: Unknown chart_type "${chartType}". Supported: scatter, histogram, heatmap, boxplot, violin, line, bar, kaplan_meier, forest, volcano, qq.`;
2452
+ }
2453
+ }
2454
+ let out = `## Visualization Code — ${chartType} (${language === 'python' ? 'Python' : 'R'})\n\n`;
2455
+ out += `**Style:** ${isPub ? 'Publication' : 'Presentation'} | **Data:** ${dataDesc}\n\n`;
2456
+ out += '```' + (language === 'python' ? 'python' : 'r') + '\n';
2457
+ out += code + '\n';
2458
+ out += '```\n';
2459
+ out += `\n*Copy this code and replace the sample data with your actual data. The plot will be saved to the working directory.*\n`;
2460
+ return out;
2461
+ },
2462
+ });
2463
+ }
2464
+ //# sourceMappingURL=lab-data.js.map