@fuzdev/fuz_util 0.42.0 โ†’ 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +19 -12
  3. package/dist/async.d.ts +2 -2
  4. package/dist/async.d.ts.map +1 -1
  5. package/dist/async.js +2 -2
  6. package/dist/benchmark.d.ts +179 -0
  7. package/dist/benchmark.d.ts.map +1 -0
  8. package/dist/benchmark.js +400 -0
  9. package/dist/benchmark_baseline.d.ts +195 -0
  10. package/dist/benchmark_baseline.d.ts.map +1 -0
  11. package/dist/benchmark_baseline.js +388 -0
  12. package/dist/benchmark_format.d.ts +87 -0
  13. package/dist/benchmark_format.d.ts.map +1 -0
  14. package/dist/benchmark_format.js +266 -0
  15. package/dist/benchmark_stats.d.ts +112 -0
  16. package/dist/benchmark_stats.d.ts.map +1 -0
  17. package/dist/benchmark_stats.js +219 -0
  18. package/dist/benchmark_types.d.ts +174 -0
  19. package/dist/benchmark_types.d.ts.map +1 -0
  20. package/dist/benchmark_types.js +1 -0
  21. package/dist/git.d.ts +12 -0
  22. package/dist/git.d.ts.map +1 -1
  23. package/dist/git.js +14 -0
  24. package/dist/library_json.d.ts +3 -3
  25. package/dist/library_json.d.ts.map +1 -1
  26. package/dist/library_json.js +1 -1
  27. package/dist/maths.d.ts +4 -0
  28. package/dist/maths.d.ts.map +1 -1
  29. package/dist/maths.js +8 -0
  30. package/dist/object.js +1 -1
  31. package/dist/source_json.d.ts +4 -4
  32. package/dist/stats.d.ts +180 -0
  33. package/dist/stats.d.ts.map +1 -0
  34. package/dist/stats.js +402 -0
  35. package/dist/string.d.ts +13 -0
  36. package/dist/string.d.ts.map +1 -1
  37. package/dist/string.js +58 -0
  38. package/dist/time.d.ts +165 -0
  39. package/dist/time.d.ts.map +1 -0
  40. package/dist/time.js +264 -0
  41. package/dist/timings.d.ts +1 -7
  42. package/dist/timings.d.ts.map +1 -1
  43. package/dist/timings.js +16 -16
  44. package/package.json +21 -19
  45. package/src/lib/async.ts +3 -3
  46. package/src/lib/benchmark.ts +498 -0
  47. package/src/lib/benchmark_baseline.ts +538 -0
  48. package/src/lib/benchmark_format.ts +314 -0
  49. package/src/lib/benchmark_stats.ts +311 -0
  50. package/src/lib/benchmark_types.ts +197 -0
  51. package/src/lib/git.ts +24 -0
  52. package/src/lib/library_json.ts +3 -3
  53. package/src/lib/maths.ts +8 -0
  54. package/src/lib/object.ts +1 -1
  55. package/src/lib/stats.ts +534 -0
  56. package/src/lib/string.ts +66 -0
  57. package/src/lib/time.ts +319 -0
  58. package/src/lib/timings.ts +17 -17
  59. package/src/lib/types.ts +2 -2
@@ -0,0 +1,534 @@
1
+ /**
2
+ * Statistical analysis utilities.
3
+ * Pure functions with zero dependencies - can be used standalone for any data analysis.
4
+ */
5
+
6
+ // Statistical constants (defaults)
7
+ const DEFAULT_IQR_MULTIPLIER = 1.5;
8
+ const DEFAULT_MAD_Z_SCORE_THRESHOLD = 3.5;
9
+ const DEFAULT_MAD_Z_SCORE_EXTREME = 5.0;
10
+ const DEFAULT_MAD_CONSTANT = 0.6745; // For normal distribution approximation
11
+ const DEFAULT_OUTLIER_RATIO_HIGH = 0.3;
12
+ const DEFAULT_OUTLIER_RATIO_EXTREME = 0.4;
13
+ const DEFAULT_OUTLIER_KEEP_RATIO = 0.8;
14
+ const DEFAULT_CONFIDENCE_Z = 1.96; // 95% confidence
15
+ const DEFAULT_MIN_SAMPLE_SIZE = 3;
16
+
17
+ /**
18
+ * Calculate the mean (average) of an array of numbers.
19
+ */
20
+ export const stats_mean = (values: Array<number>): number => {
21
+ if (values.length === 0) return NaN;
22
+ return values.reduce((sum, val) => sum + val, 0) / values.length;
23
+ };
24
+
25
+ /**
26
+ * Calculate the median of an array of numbers.
27
+ */
28
+ export const stats_median = (values: Array<number>): number => {
29
+ if (values.length === 0) return NaN;
30
+ const sorted = [...values].sort((a, b) => a - b);
31
+ const mid = Math.floor(sorted.length / 2);
32
+ return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!;
33
+ };
34
+
35
+ /**
36
+ * Calculate the standard deviation of an array of numbers.
37
+ * Uses population standard deviation (divides by n, not n-1).
38
+ * For benchmarks with many samples, this is typically appropriate.
39
+ */
40
+ export const stats_std_dev = (values: Array<number>, mean?: number): number => {
41
+ if (values.length === 0) return NaN;
42
+ const m = mean ?? stats_mean(values);
43
+ const variance = values.reduce((sum, val) => sum + (val - m) ** 2, 0) / values.length;
44
+ return Math.sqrt(variance);
45
+ };
46
+
47
+ /**
48
+ * Calculate the variance of an array of numbers.
49
+ */
50
+ export const stats_variance = (values: Array<number>, mean?: number): number => {
51
+ if (values.length === 0) return NaN;
52
+ const m = mean ?? stats_mean(values);
53
+ return values.reduce((sum, val) => sum + (val - m) ** 2, 0) / values.length;
54
+ };
55
+
56
+ /**
57
+ * Calculate a percentile of an array of numbers using linear interpolation.
58
+ * Uses the "R-7" method (default in R, NumPy, Excel) which interpolates between
59
+ * data points for more accurate percentile estimates, especially with smaller samples.
60
+ * @param values - Array of numbers
61
+ * @param p - Percentile (0-1, e.g., 0.95 for 95th percentile)
62
+ */
63
+ export const stats_percentile = (values: Array<number>, p: number): number => {
64
+ if (values.length === 0) return NaN;
65
+ if (values.length === 1) return values[0]!;
66
+
67
+ const sorted = [...values].sort((a, b) => a - b);
68
+ const n = sorted.length;
69
+
70
+ // R-7 method: index = (n - 1) * p
71
+ const index = (n - 1) * p;
72
+ const lower = Math.floor(index);
73
+ const upper = Math.ceil(index);
74
+
75
+ if (lower === upper) {
76
+ return sorted[lower]!;
77
+ }
78
+
79
+ // Linear interpolation between the two nearest values
80
+ const fraction = index - lower;
81
+ return sorted[lower]! + fraction * (sorted[upper]! - sorted[lower]!);
82
+ };
83
+
84
+ /**
85
+ * Calculate the coefficient of variation (CV).
86
+ * CV = standard deviation / mean, expressed as a ratio.
87
+ * Useful for comparing relative variability between datasets.
88
+ */
89
+ export const stats_cv = (mean: number, std_dev: number): number => {
90
+ if (mean === 0) return NaN;
91
+ return std_dev / mean;
92
+ };
93
+
94
+ /**
95
+ * Calculate min and max values.
96
+ */
97
+ export const stats_min_max = (values: Array<number>): {min: number; max: number} => {
98
+ if (values.length === 0) return {min: NaN, max: NaN};
99
+ let min = values[0]!;
100
+ let max = values[0]!;
101
+ for (let i = 1; i < values.length; i++) {
102
+ const val = values[i]!;
103
+ if (val < min) min = val;
104
+ if (val > max) max = val;
105
+ }
106
+ return {min, max};
107
+ };
108
+
109
+ /**
110
+ * Result from outlier detection.
111
+ */
112
+ export interface StatsOutlierResult {
113
+ /** Values after removing outliers */
114
+ cleaned: Array<number>;
115
+ /** Detected outlier values */
116
+ outliers: Array<number>;
117
+ }
118
+
119
+ /**
120
+ * Configuration options for IQR outlier detection.
121
+ */
122
+ export interface StatsOutliersIqrOptions {
123
+ /** Multiplier for IQR bounds (default: 1.5) */
124
+ iqr_multiplier?: number;
125
+ /** Minimum sample size to perform outlier detection (default: 3) */
126
+ min_sample_size?: number;
127
+ }
128
+
129
+ /**
130
+ * Detect outliers using the IQR (Interquartile Range) method.
131
+ * Values outside [Q1 - multiplier*IQR, Q3 + multiplier*IQR] are considered outliers.
132
+ */
133
+ export const stats_outliers_iqr = (
134
+ values: Array<number>,
135
+ options?: StatsOutliersIqrOptions,
136
+ ): StatsOutlierResult => {
137
+ const iqr_multiplier = options?.iqr_multiplier ?? DEFAULT_IQR_MULTIPLIER;
138
+ const min_sample_size = options?.min_sample_size ?? DEFAULT_MIN_SAMPLE_SIZE;
139
+
140
+ if (values.length < min_sample_size) {
141
+ return {cleaned: values, outliers: []};
142
+ }
143
+
144
+ const sorted = [...values].sort((a, b) => a - b);
145
+ const q1 = sorted[Math.floor(sorted.length * 0.25)]!;
146
+ const q3 = sorted[Math.floor(sorted.length * 0.75)]!;
147
+ const iqr = q3 - q1;
148
+
149
+ if (iqr === 0) {
150
+ return {cleaned: values, outliers: []};
151
+ }
152
+
153
+ const lower_bound = q1 - iqr_multiplier * iqr;
154
+ const upper_bound = q3 + iqr_multiplier * iqr;
155
+
156
+ const cleaned: Array<number> = [];
157
+ const outliers: Array<number> = [];
158
+
159
+ for (const value of values) {
160
+ if (value < lower_bound || value > upper_bound) {
161
+ outliers.push(value);
162
+ } else {
163
+ cleaned.push(value);
164
+ }
165
+ }
166
+
167
+ return {cleaned, outliers};
168
+ };
169
+
170
+ /**
171
+ * Configuration options for MAD outlier detection.
172
+ */
173
+ export interface StatsOutliersMadOptions {
174
+ /** Modified Z-score threshold for outlier detection (default: 3.5) */
175
+ z_score_threshold?: number;
176
+ /** Extreme Z-score threshold when too many outliers detected (default: 5.0) */
177
+ z_score_extreme?: number;
178
+ /** MAD constant for normal distribution (default: 0.6745) */
179
+ mad_constant?: number;
180
+ /** Ratio threshold to switch to extreme mode (default: 0.3) */
181
+ outlier_ratio_high?: number;
182
+ /** Ratio threshold to switch to keep-closest mode (default: 0.4) */
183
+ outlier_ratio_extreme?: number;
184
+ /** Ratio of values to keep in keep-closest mode (default: 0.8) */
185
+ outlier_keep_ratio?: number;
186
+ /** Minimum sample size to perform outlier detection (default: 3) */
187
+ min_sample_size?: number;
188
+ /** Options to pass to IQR fallback when MAD is zero */
189
+ iqr_options?: StatsOutliersIqrOptions;
190
+ }
191
+
192
+ /**
193
+ * Detect outliers using the MAD (Median Absolute Deviation) method.
194
+ * More robust than IQR for skewed distributions.
195
+ * Uses modified Z-score: |0.6745 * (x - median) / MAD|
196
+ * Values with modified Z-score > threshold are considered outliers.
197
+ */
198
+ export const stats_outliers_mad = (
199
+ values: Array<number>,
200
+ options?: StatsOutliersMadOptions,
201
+ ): StatsOutlierResult => {
202
+ const z_score_threshold = options?.z_score_threshold ?? DEFAULT_MAD_Z_SCORE_THRESHOLD;
203
+ const z_score_extreme = options?.z_score_extreme ?? DEFAULT_MAD_Z_SCORE_EXTREME;
204
+ const mad_constant = options?.mad_constant ?? DEFAULT_MAD_CONSTANT;
205
+ const outlier_ratio_high = options?.outlier_ratio_high ?? DEFAULT_OUTLIER_RATIO_HIGH;
206
+ const outlier_ratio_extreme = options?.outlier_ratio_extreme ?? DEFAULT_OUTLIER_RATIO_EXTREME;
207
+ const outlier_keep_ratio = options?.outlier_keep_ratio ?? DEFAULT_OUTLIER_KEEP_RATIO;
208
+ const min_sample_size = options?.min_sample_size ?? DEFAULT_MIN_SAMPLE_SIZE;
209
+ const iqr_options = options?.iqr_options;
210
+
211
+ if (values.length < min_sample_size) {
212
+ return {cleaned: values, outliers: []};
213
+ }
214
+
215
+ const sorted = [...values].sort((a, b) => a - b);
216
+ const median = stats_median(sorted);
217
+
218
+ // Calculate MAD (Median Absolute Deviation)
219
+ const deviations = values.map((v) => Math.abs(v - median));
220
+ const sorted_deviations = [...deviations].sort((a, b) => a - b);
221
+ const mad = stats_median(sorted_deviations);
222
+
223
+ // If MAD is zero, fall back to IQR method
224
+ if (mad === 0) {
225
+ return stats_outliers_iqr(values, iqr_options);
226
+ }
227
+
228
+ // Use modified Z-score with MAD
229
+ let cleaned: Array<number> = [];
230
+ let outliers: Array<number> = [];
231
+
232
+ for (const value of values) {
233
+ const modified_z_score = (mad_constant * (value - median)) / mad;
234
+ if (Math.abs(modified_z_score) > z_score_threshold) {
235
+ outliers.push(value);
236
+ } else {
237
+ cleaned.push(value);
238
+ }
239
+ }
240
+
241
+ // If too many outliers, increase threshold and try again
242
+ if (outliers.length > values.length * outlier_ratio_high) {
243
+ cleaned = [];
244
+ outliers = [];
245
+
246
+ for (const value of values) {
247
+ const modified_z_score = (mad_constant * (value - median)) / mad;
248
+ if (Math.abs(modified_z_score) > z_score_extreme) {
249
+ outliers.push(value);
250
+ } else {
251
+ cleaned.push(value);
252
+ }
253
+ }
254
+
255
+ // If still too many outliers, keep closest values to median
256
+ if (outliers.length > values.length * outlier_ratio_extreme) {
257
+ const with_distances = values.map((v) => ({
258
+ value: v,
259
+ distance: Math.abs(v - median),
260
+ }));
261
+ with_distances.sort((a, b) => a.distance - b.distance);
262
+
263
+ const keep_count = Math.floor(values.length * outlier_keep_ratio);
264
+ cleaned = with_distances.slice(0, keep_count).map((d) => d.value);
265
+ outliers = with_distances.slice(keep_count).map((d) => d.value);
266
+ }
267
+ }
268
+
269
+ return {cleaned, outliers};
270
+ };
271
+
272
+ /**
273
+ * Common z-scores for confidence intervals.
274
+ */
275
+ export const STATS_CONFIDENCE_Z_SCORES: Record<number, number> = {
276
+ 0.8: 1.282,
277
+ 0.9: 1.645,
278
+ 0.95: 1.96,
279
+ 0.99: 2.576,
280
+ 0.999: 3.291,
281
+ };
282
+
283
+ /**
284
+ * Convert a confidence level (0-1) to a z-score.
285
+ * Uses a lookup table for common values, approximates others.
286
+ *
287
+ * @example
288
+ * ```ts
289
+ * stats_confidence_level_to_z_score(0.95); // 1.96
290
+ * stats_confidence_level_to_z_score(0.99); // 2.576
291
+ * ```
292
+ */
293
+ export const stats_confidence_level_to_z_score = (level: number): number => {
294
+ if (level <= 0 || level >= 1) {
295
+ throw new Error('Confidence level must be between 0 and 1 (exclusive)');
296
+ }
297
+
298
+ // Check lookup table first
299
+ if (level in STATS_CONFIDENCE_Z_SCORES) {
300
+ return STATS_CONFIDENCE_Z_SCORES[level]!;
301
+ }
302
+
303
+ // For confidence level c, we want z such that P(-z < Z < z) = c
304
+ // This means ฮฆ(z) = (1 + c) / 2, so z = ฮฆโปยน((1 + c) / 2)
305
+ // Using ฮฆโปยน(p) = โˆš2 * erfinv(2p - 1)
306
+ const p = (1 + level) / 2; // e.g., 0.95 -> 0.975
307
+ const x = 2 * p - 1; // Argument for erfinv, e.g., 0.975 -> 0.95
308
+
309
+ // Winitzki approximation for erfinv
310
+ const a = 0.147;
311
+ const ln_term = Math.log(1 - x * x);
312
+ const term1 = 2 / (Math.PI * a) + ln_term / 2;
313
+ const erfinv = Math.sign(x) * Math.sqrt(Math.sqrt(term1 * term1 - ln_term / a) - term1);
314
+
315
+ return Math.SQRT2 * erfinv;
316
+ };
317
+
318
+ /**
319
+ * Configuration options for confidence interval calculation.
320
+ */
321
+ export interface StatsConfidenceIntervalOptions {
322
+ /** Z-score for confidence level (default: 1.96 for 95% CI) */
323
+ z_score?: number;
324
+ /** Confidence level (0-1), alternative to z_score. If both provided, z_score takes precedence. */
325
+ confidence_level?: number;
326
+ }
327
+
328
+ /**
329
+ * Calculate confidence interval for the mean.
330
+ * @param values - Array of numbers
331
+ * @param options - Configuration options
332
+ * @returns [lower_bound, upper_bound]
333
+ */
334
+ export const stats_confidence_interval = (
335
+ values: Array<number>,
336
+ options?: StatsConfidenceIntervalOptions,
337
+ ): [number, number] => {
338
+ if (values.length === 0) return [NaN, NaN];
339
+
340
+ const mean = stats_mean(values);
341
+ const std_dev = stats_std_dev(values, mean);
342
+
343
+ return stats_confidence_interval_from_summary(mean, std_dev, values.length, options);
344
+ };
345
+
346
+ /**
347
+ * Calculate confidence interval from summary statistics (mean, std_dev, sample_size).
348
+ * Useful when raw data is not available.
349
+ * @param mean - Mean of the data
350
+ * @param std_dev - Standard deviation of the data
351
+ * @param sample_size - Number of samples
352
+ * @param options - Configuration options
353
+ * @returns [lower_bound, upper_bound]
354
+ */
355
+ export const stats_confidence_interval_from_summary = (
356
+ mean: number,
357
+ std_dev: number,
358
+ sample_size: number,
359
+ options?: StatsConfidenceIntervalOptions,
360
+ ): [number, number] => {
361
+ // z_score takes precedence, then confidence_level, then default
362
+ const z_score =
363
+ options?.z_score ??
364
+ (options?.confidence_level
365
+ ? stats_confidence_level_to_z_score(options.confidence_level)
366
+ : null) ??
367
+ DEFAULT_CONFIDENCE_Z;
368
+
369
+ if (sample_size === 0) return [NaN, NaN];
370
+
371
+ const se = std_dev / Math.sqrt(sample_size);
372
+ const margin = z_score * se;
373
+
374
+ return [mean - margin, mean + margin];
375
+ };
376
+
377
+ // Hypothesis Testing Utilities
378
+ // These functions support statistical significance testing (t-tests, p-values, etc.)
379
+
380
+ /**
381
+ * Result from Welch's t-test calculation.
382
+ */
383
+ export interface StatsWelchTTestResult {
384
+ /** The t-statistic */
385
+ t_statistic: number;
386
+ /** Welch-Satterthwaite degrees of freedom */
387
+ degrees_of_freedom: number;
388
+ }
389
+
390
+ /**
391
+ * Calculate Welch's t-test statistic and degrees of freedom.
392
+ * Welch's t-test is more robust than Student's t-test when variances are unequal.
393
+ *
394
+ * @param mean1 - Mean of first sample
395
+ * @param std1 - Standard deviation of first sample
396
+ * @param n1 - Size of first sample
397
+ * @param mean2 - Mean of second sample
398
+ * @param std2 - Standard deviation of second sample
399
+ * @param n2 - Size of second sample
400
+ */
401
+ export const stats_welch_t_test = (
402
+ mean1: number,
403
+ std1: number,
404
+ n1: number,
405
+ mean2: number,
406
+ std2: number,
407
+ n2: number,
408
+ ): StatsWelchTTestResult => {
409
+ const var1 = std1 ** 2;
410
+ const var2 = std2 ** 2;
411
+
412
+ const se1 = var1 / n1;
413
+ const se2 = var2 / n2;
414
+
415
+ const t_statistic = (mean1 - mean2) / Math.sqrt(se1 + se2);
416
+
417
+ // Welch-Satterthwaite degrees of freedom
418
+ const numerator = (se1 + se2) ** 2;
419
+ const denominator = se1 ** 2 / (n1 - 1) + se2 ** 2 / (n2 - 1);
420
+ const degrees_of_freedom = numerator / denominator;
421
+
422
+ return {t_statistic, degrees_of_freedom};
423
+ };
424
+
425
+ /**
426
+ * Standard normal CDF approximation (Abramowitz and Stegun formula 7.1.26).
427
+ */
428
+ export const stats_normal_cdf = (x: number): number => {
429
+ const t = 1 / (1 + 0.2316419 * Math.abs(x));
430
+ const d = 0.3989423 * Math.exp((-x * x) / 2);
431
+ const p =
432
+ d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
433
+ return x > 0 ? 1 - p : p;
434
+ };
435
+
436
+ /**
437
+ * Log gamma function approximation (Lanczos approximation).
438
+ */
439
+ export const stats_ln_gamma = (z: number): number => {
440
+ const g = 7;
441
+ const c = [
442
+ 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
443
+ -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
444
+ 1.5056327351493116e-7,
445
+ ];
446
+
447
+ if (z < 0.5) {
448
+ return Math.log(Math.PI / Math.sin(Math.PI * z)) - stats_ln_gamma(1 - z);
449
+ }
450
+
451
+ const z_adj = z - 1;
452
+ let x = c[0]!;
453
+ for (let i = 1; i < g + 2; i++) {
454
+ x += c[i]! / (z_adj + i);
455
+ }
456
+ const t = z_adj + g + 0.5;
457
+ return 0.5 * Math.log(2 * Math.PI) + (z_adj + 0.5) * Math.log(t) - t + Math.log(x);
458
+ };
459
+
460
+ /**
461
+ * Approximate regularized incomplete beta function for p-value calculation.
462
+ * Uses continued fraction expansion for reasonable accuracy.
463
+ */
464
+ export const stats_incomplete_beta = (x: number, a: number, b: number): number => {
465
+ // Simple approximation using the relationship between beta and normal distributions
466
+ // For our use case (t-distribution p-values), this provides sufficient accuracy
467
+ if (x <= 0) return 0;
468
+ if (x >= 1) return 1;
469
+
470
+ // Use symmetry if needed
471
+ if (x > (a + 1) / (a + b + 2)) {
472
+ return 1 - stats_incomplete_beta(1 - x, b, a);
473
+ }
474
+
475
+ // Continued fraction approximation (first few terms)
476
+ const lnBeta = stats_ln_gamma(a) + stats_ln_gamma(b) - stats_ln_gamma(a + b);
477
+ const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
478
+
479
+ // Simple continued fraction (limited iterations for speed)
480
+ let f = 1;
481
+ let c = 1;
482
+ let d = 0;
483
+
484
+ for (let m = 1; m <= 100; m++) {
485
+ const m2 = 2 * m;
486
+
487
+ // Even step
488
+ let aa = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2));
489
+ d = 1 + aa * d;
490
+ if (Math.abs(d) < 1e-30) d = 1e-30;
491
+ c = 1 + aa / c;
492
+ if (Math.abs(c) < 1e-30) c = 1e-30;
493
+ d = 1 / d;
494
+ f *= d * c;
495
+
496
+ // Odd step
497
+ aa = (-(a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
498
+ d = 1 + aa * d;
499
+ if (Math.abs(d) < 1e-30) d = 1e-30;
500
+ c = 1 + aa / c;
501
+ if (Math.abs(c) < 1e-30) c = 1e-30;
502
+ d = 1 / d;
503
+ const delta = d * c;
504
+ f *= delta;
505
+
506
+ if (Math.abs(delta - 1) < 1e-8) break;
507
+ }
508
+
509
+ return front * f;
510
+ };
511
+
512
+ /**
513
+ * Approximate two-tailed p-value from t-distribution.
514
+ * For large df (>100), uses normal approximation.
515
+ * For smaller df, uses incomplete beta function.
516
+ *
517
+ * @param t - Absolute value of t-statistic
518
+ * @param df - Degrees of freedom
519
+ * @returns Two-tailed p-value
520
+ */
521
+ export const stats_t_distribution_p_value = (t: number, df: number): number => {
522
+ // Use normal approximation for large df
523
+ if (df > 100) {
524
+ return 2 * (1 - stats_normal_cdf(t));
525
+ }
526
+
527
+ // For smaller df, use a more accurate approximation
528
+ // Based on the incomplete beta function relationship
529
+ const x = df / (df + t * t);
530
+ const a = df / 2;
531
+ const b = 0.5;
532
+
533
+ return stats_incomplete_beta(x, a, b);
534
+ };
package/src/lib/string.ts CHANGED
@@ -97,3 +97,69 @@ export const strip_ansi = (str: string): string => str.replaceAll(/\x1B\[[0-9;]*
97
97
  */
98
98
  export const stringify = (value: unknown): string =>
99
99
  typeof value === 'bigint' ? value + 'n' : (JSON.stringify(value) ?? String(value)); // eslint-disable-line @typescript-eslint/no-unnecessary-condition
100
+
101
+ /**
102
+ * Calculate the display width of a string in terminal columns.
103
+ * - Strips ANSI escape codes (they have 0 width)
104
+ * - Emojis and other wide characters take 2 columns
105
+ * - Tab characters take 4 columns
106
+ * - Newlines and other control characters take 0 columns
107
+ * - Uses `Intl.Segmenter` to properly handle grapheme clusters (e.g., family emoji "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")
108
+ */
109
+ export const string_display_width = (str: string): number => {
110
+ // Strip ANSI codes first (they have 0 display width)
111
+ const clean = strip_ansi(str);
112
+
113
+ let width = 0;
114
+ const segmenter = new Intl.Segmenter();
115
+ for (const {segment} of segmenter.segment(clean)) {
116
+ const code = segment.codePointAt(0)!;
117
+
118
+ // Handle control characters
119
+ if (code === 0x09) {
120
+ // Tab = 4 columns
121
+ width += 4;
122
+ continue;
123
+ }
124
+ if (code < 0x20 || (code >= 0x7f && code < 0xa0)) {
125
+ // Other control characters (including newline) = 0 width
126
+ continue;
127
+ }
128
+
129
+ // Emoji and other wide characters (rough heuristic)
130
+ // - Most emoji are in range 0x1F300-0x1FAFF
131
+ // - Some are in 0x2600-0x27BF (misc symbols)
132
+ // - CJK characters 0x4E00-0x9FFF also double-width
133
+ // - Grapheme clusters with multiple code points (like ZWJ sequences) are typically emoji
134
+ if (
135
+ segment.length > 1 || // Multi-codepoint graphemes (ZWJ sequences, etc.)
136
+ (code >= 0x1f300 && code <= 0x1faff) ||
137
+ (code >= 0x2600 && code <= 0x27bf) ||
138
+ (code >= 0x1f600 && code <= 0x1f64f) ||
139
+ (code >= 0x1f680 && code <= 0x1f6ff) ||
140
+ (code >= 0x4e00 && code <= 0x9fff) // CJK
141
+ ) {
142
+ width += 2;
143
+ } else {
144
+ width += 1;
145
+ }
146
+ }
147
+ return width;
148
+ };
149
+
150
+ /**
151
+ * Pad a string to a target display width (accounting for wide characters).
152
+ */
153
+ export const pad_width = (
154
+ str: string,
155
+ target_width: number,
156
+ align: 'left' | 'right' = 'left',
157
+ ): string => {
158
+ const current_width = string_display_width(str);
159
+ const padding = Math.max(0, target_width - current_width);
160
+ if (align === 'left') {
161
+ return str + ' '.repeat(padding);
162
+ } else {
163
+ return ' '.repeat(padding) + str;
164
+ }
165
+ };