@fuzdev/fuz_util 0.43.0 → 0.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,48 +1,7 @@
1
1
  import type {BenchmarkResult, BenchmarkGroup} from './benchmark_types.js';
2
- import {time_unit_detect_best, time_format, type TimeUnit} from './time.js';
3
-
4
- /**
5
- * Calculate the display width of a string in terminal columns.
6
- * Emojis and other wide characters take 2 columns.
7
- */
8
- const string_display_width = (str: string): number => {
9
- let width = 0;
10
- for (const char of str) {
11
- const code = char.codePointAt(0)!;
12
- // Emoji and other wide characters (rough heuristic)
13
- // - Most emoji are in range 0x1F300-0x1FAFF
14
- // - Some are in 0x2600-0x27BF (misc symbols)
15
- // - CJK characters 0x4E00-0x9FFF also double-width but not handling here
16
- if (
17
- (code >= 0x1f300 && code <= 0x1faff) ||
18
- (code >= 0x2600 && code <= 0x27bf) ||
19
- (code >= 0x1f600 && code <= 0x1f64f) ||
20
- (code >= 0x1f680 && code <= 0x1f6ff)
21
- ) {
22
- width += 2;
23
- } else {
24
- width += 1;
25
- }
26
- }
27
- return width;
28
- };
29
-
30
- /**
31
- * Pad a string to a target display width (accounting for wide characters).
32
- */
33
- const pad_to_width = (
34
- str: string,
35
- target_width: number,
36
- align: 'left' | 'right' = 'left',
37
- ): string => {
38
- const current_width = string_display_width(str);
39
- const padding = Math.max(0, target_width - current_width);
40
- if (align === 'left') {
41
- return str + ' '.repeat(padding);
42
- } else {
43
- return ' '.repeat(padding) + str;
44
- }
45
- };
2
+ import {time_unit_detect_best, time_format, TIME_UNIT_DISPLAY} from './time.js';
3
+ import {string_display_width, pad_width} from './string.js';
4
+ import {format_number} from './maths.js';
46
5
 
47
6
  /**
48
7
  * Format results as an ASCII table with percentiles, min/max, and relative performance.
@@ -53,19 +12,13 @@ const pad_to_width = (
53
12
  * @example
54
13
  * ```ts
55
14
  * console.log(benchmark_format_table(results));
56
- * // ┌────┬─────────────┬────────────┬────────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
57
- * // │ Task Name │ ops/sec │ median(μs) │ p75 (μs) │ p90 (μs) │ p95 (μs) │ p99 (μs) │ min (μs) │ max (μs) │ vs Best │
58
- * // ├────┼─────────────┼────────────┼────────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
59
- * // │ 🐇 │ slugify v2 │ 1,237,144 │ 0.81 │ 0.85 │ 0.89 │ 0.95 │ 1.20 │ 0.72 │ 2.45 │ baseline │
60
- * // │ 🐢 │ slugify │ 261,619 │ 3.82 │ 3.95 │ 4.12 │ 4.35 │ 5.10 │ 3.21 │ 12.45 │ 4.73x │
61
- * // └────┴─────────────┴────────────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
15
+ * // ┌─────────────┬────────────┬────────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
16
+ * // │ Task Name │ ops/sec │ median(μs) │ p75 (μs) │ p90 (μs) │ p95 (μs) │ p99 (μs) │ min (μs) │ max (μs) │ vs Best │
17
+ * // ├─────────────┼────────────┼────────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
18
+ * // │ slugify v2 │ 1,237,144 │ 0.81 │ 0.85 │ 0.89 │ 0.95 │ 1.20 │ 0.72 │ 2.45 │ baseline │
19
+ * // │ slugify │ 261,619 │ 3.82 │ 3.95 │ 4.12 │ 4.35 │ 5.10 │ 3.21 │ 12.45 │ 4.73x │
20
+ * // └─────────────┴────────────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
62
21
  * ```
63
- *
64
- * **Performance tier animals:**
65
- * - 🐆 Cheetah: >1M ops/sec (extremely fast)
66
- * - 🐇 Rabbit: >100K ops/sec (fast)
67
- * - 🐢 Turtle: >10K ops/sec (moderate)
68
- * - 🐌 Snail: <10K ops/sec (slow)
69
22
  */
70
23
  export const benchmark_format_table = (results: Array<BenchmarkResult>): string => {
71
24
  if (results.length === 0) return '(no results)';
@@ -73,7 +26,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
73
26
  // Detect best unit for all results
74
27
  const mean_times = results.map((r) => r.stats.mean_ns);
75
28
  const unit = time_unit_detect_best(mean_times);
76
- const unit_str = UNIT_LABELS[unit];
29
+ const unit_str = TIME_UNIT_DISPLAY[unit];
77
30
 
78
31
  // Find fastest for relative comparison
79
32
  const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
@@ -82,7 +35,6 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
82
35
 
83
36
  // Header with unit
84
37
  rows.push([
85
- '',
86
38
  'Task Name',
87
39
  'ops/sec',
88
40
  `median (${unit_str})`,
@@ -97,7 +49,6 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
97
49
 
98
50
  // Data rows - all use same unit
99
51
  results.forEach((r) => {
100
- const tier = get_perf_tier(r.stats.ops_per_second);
101
52
  const ops_sec = benchmark_format_number(r.stats.ops_per_second, 2);
102
53
  const median = time_format(r.stats.median_ns, unit, 2).replace(unit_str, '').trim();
103
54
  const p75 = time_format(r.stats.p75_ns, unit, 2).replace(unit_str, '').trim();
@@ -111,7 +62,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
111
62
  const ratio = fastest_ops / r.stats.ops_per_second;
112
63
  const vs_best = ratio === 1.0 ? 'baseline' : `${ratio.toFixed(2)}x`;
113
64
 
114
- rows.push([tier, r.name, ops_sec, median, p75, p90, p95, p99, min, max, vs_best]);
65
+ rows.push([r.name, ops_sec, median, p75, p90, p95, p99, min, max, vs_best]);
115
66
  });
116
67
 
117
68
  // Calculate column widths (using display width for proper emoji handling)
@@ -126,7 +77,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
126
77
  lines.push('┌' + widths.map((w) => '─'.repeat(w + 2)).join('┬') + '┐');
127
78
 
128
79
  // Header
129
- const header = rows[0]!.map((cell, i) => ' ' + pad_to_width(cell, widths[i]!) + ' ').join('│');
80
+ const header = rows[0]!.map((cell, i) => ' ' + pad_width(cell, widths[i]!) + ' ').join('│');
130
81
  lines.push('│' + header + '│');
131
82
 
132
83
  // Header separator
@@ -136,11 +87,11 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
136
87
  for (let i = 1; i < rows.length; i++) {
137
88
  const row = rows[i]!.map((cell, col_i) => {
138
89
  const width = widths[col_i]!;
139
- // Left-align tier emoji and task name, right-align numbers
140
- if (col_i === 0 || col_i === 1) {
141
- return ' ' + pad_to_width(cell, width, 'left') + ' ';
90
+ // Left-align task name, right-align numbers
91
+ if (col_i === 0) {
92
+ return ' ' + pad_width(cell, width, 'left') + ' ';
142
93
  } else {
143
- return ' ' + pad_to_width(cell, width, 'right') + ' ';
94
+ return ' ' + pad_width(cell, width, 'right') + ' ';
144
95
  }
145
96
  }).join('│');
146
97
  lines.push('│' + row + '│');
@@ -173,7 +124,7 @@ export const benchmark_format_markdown = (results: Array<BenchmarkResult>): stri
173
124
  // Detect best unit for all results
174
125
  const mean_times = results.map((r) => r.stats.mean_ns);
175
126
  const unit = time_unit_detect_best(mean_times);
176
- const unit_str = UNIT_LABELS[unit];
127
+ const unit_str = TIME_UNIT_DISPLAY[unit];
177
128
 
178
129
  // Find fastest for relative comparison
179
130
  const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
@@ -356,24 +307,8 @@ export const benchmark_format_table_grouped = (
356
307
  return sections.join('\n');
357
308
  };
358
309
 
359
- // TODO consider extracting to a general format utility module when more formatters are needed
360
310
  /**
361
311
  * Format a number with fixed decimal places and thousands separators.
312
+ * @see {@link format_number} in maths.ts for the underlying implementation.
362
313
  */
363
- export const benchmark_format_number = (n: number, decimals: number = 2): string => {
364
- if (!isFinite(n)) return String(n);
365
- return n.toFixed(decimals).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
366
- };
367
-
368
- /**
369
- * Get performance tier symbol based on ops/sec.
370
- */
371
- const get_perf_tier = (ops_per_sec: number): string => {
372
- if (ops_per_sec >= 1_000_000) return '🐆'; // > 1M ops/sec (cheetah - extremely fast)
373
- if (ops_per_sec >= 100_000) return '🐇'; // > 100K ops/sec (rabbit - fast)
374
- if (ops_per_sec >= 10_000) return '🐢'; // > 10K ops/sec (turtle - moderate)
375
- return '🐌'; // < 10K ops/sec (snail - slow)
376
- };
377
-
378
- /** Unit labels for display (μs instead of us). */
379
- const UNIT_LABELS: Record<TimeUnit, string> = {ns: 'ns', us: 'μs', ms: 'ms', s: 's'};
314
+ export const benchmark_format_number = format_number;
@@ -14,6 +14,8 @@ import {
14
14
  stats_min_max,
15
15
  stats_confidence_interval,
16
16
  stats_outliers_mad,
17
+ stats_welch_t_test,
18
+ stats_t_distribution_p_value,
17
19
  } from './stats.js';
18
20
 
19
21
  /**
@@ -226,7 +228,7 @@ export const benchmark_stats_compare = (
226
228
  // When there's no variance, any difference is 100% reliable (p=0) or identical (p=1)
227
229
  p_value = a.mean_ns === b.mean_ns ? 1 : 0;
228
230
  } else {
229
- const {t_statistic, degrees_of_freedom} = welch_t_test(
231
+ const {t_statistic, degrees_of_freedom} = stats_welch_t_test(
230
232
  a.mean_ns,
231
233
  a.std_dev_ns,
232
234
  a.sample_size,
@@ -235,7 +237,7 @@ export const benchmark_stats_compare = (
235
237
  b.sample_size,
236
238
  );
237
239
  // Calculate two-tailed p-value using t-distribution approximation
238
- p_value = t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
240
+ p_value = stats_t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
239
241
  }
240
242
 
241
243
  // Cohen's d effect size
@@ -307,142 +309,3 @@ export const benchmark_stats_compare = (
307
309
  recommendation,
308
310
  };
309
311
  };
310
-
311
- /**
312
- * Calculate Welch's t-test statistic and degrees of freedom.
313
- * Welch's t-test is more robust than Student's t-test when variances are unequal.
314
- */
315
- const welch_t_test = (
316
- mean1: number,
317
- std1: number,
318
- n1: number,
319
- mean2: number,
320
- std2: number,
321
- n2: number,
322
- ): {t_statistic: number; degrees_of_freedom: number} => {
323
- const var1 = std1 ** 2;
324
- const var2 = std2 ** 2;
325
-
326
- const se1 = var1 / n1;
327
- const se2 = var2 / n2;
328
-
329
- const t_statistic = (mean1 - mean2) / Math.sqrt(se1 + se2);
330
-
331
- // Welch-Satterthwaite degrees of freedom
332
- const numerator = (se1 + se2) ** 2;
333
- const denominator = se1 ** 2 / (n1 - 1) + se2 ** 2 / (n2 - 1);
334
- const degrees_of_freedom = numerator / denominator;
335
-
336
- return {t_statistic, degrees_of_freedom};
337
- };
338
-
339
- /**
340
- * Approximate p-value from t-distribution using the approximation formula.
341
- * This avoids requiring a full t-distribution table or library.
342
- * For large df (>30), this approximation is very accurate.
343
- */
344
- const t_distribution_p_value = (t: number, df: number): number => {
345
- // Use normal approximation for large df
346
- if (df > 100) {
347
- // Standard normal CDF approximation
348
- return 2 * (1 - normal_cdf(t));
349
- }
350
-
351
- // For smaller df, use a more accurate approximation
352
- // Based on the incomplete beta function relationship
353
- const x = df / (df + t * t);
354
- const a = df / 2;
355
- const b = 0.5;
356
-
357
- // Approximation of regularized incomplete beta function
358
- // This is accurate to about 4 decimal places for typical use cases
359
- const beta_approx = incomplete_beta_approx(x, a, b);
360
- return beta_approx;
361
- };
362
-
363
- /**
364
- * Standard normal CDF approximation (Abramowitz and Stegun formula 7.1.26).
365
- */
366
- const normal_cdf = (x: number): number => {
367
- const t = 1 / (1 + 0.2316419 * Math.abs(x));
368
- const d = 0.3989423 * Math.exp((-x * x) / 2);
369
- const p =
370
- d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
371
- return x > 0 ? 1 - p : p;
372
- };
373
-
374
- /**
375
- * Approximate regularized incomplete beta function for p-value calculation.
376
- * Uses continued fraction expansion for reasonable accuracy.
377
- */
378
- const incomplete_beta_approx = (x: number, a: number, b: number): number => {
379
- // Simple approximation using the relationship between beta and normal distributions
380
- // For our use case (t-distribution p-values), this provides sufficient accuracy
381
- if (x <= 0) return 0;
382
- if (x >= 1) return 1;
383
-
384
- // Use symmetry if needed
385
- if (x > (a + 1) / (a + b + 2)) {
386
- return 1 - incomplete_beta_approx(1 - x, b, a);
387
- }
388
-
389
- // Continued fraction approximation (first few terms)
390
- const lnBeta = ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b);
391
- const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
392
-
393
- // Simple continued fraction (limited iterations for speed)
394
- let f = 1;
395
- let c = 1;
396
- let d = 0;
397
-
398
- for (let m = 1; m <= 100; m++) {
399
- const m2 = 2 * m;
400
-
401
- // Even step
402
- let aa = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2));
403
- d = 1 + aa * d;
404
- if (Math.abs(d) < 1e-30) d = 1e-30;
405
- c = 1 + aa / c;
406
- if (Math.abs(c) < 1e-30) c = 1e-30;
407
- d = 1 / d;
408
- f *= d * c;
409
-
410
- // Odd step
411
- aa = (-(a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
412
- d = 1 + aa * d;
413
- if (Math.abs(d) < 1e-30) d = 1e-30;
414
- c = 1 + aa / c;
415
- if (Math.abs(c) < 1e-30) c = 1e-30;
416
- d = 1 / d;
417
- const delta = d * c;
418
- f *= delta;
419
-
420
- if (Math.abs(delta - 1) < 1e-8) break;
421
- }
422
-
423
- return front * f;
424
- };
425
-
426
- /**
427
- * Log gamma function approximation (Lanczos approximation).
428
- */
429
- const ln_gamma = (z: number): number => {
430
- const g = 7;
431
- const c = [
432
- 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
433
- -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
434
- 1.5056327351493116e-7,
435
- ];
436
-
437
- if (z < 0.5) {
438
- return Math.log(Math.PI / Math.sin(Math.PI * z)) - ln_gamma(1 - z);
439
- }
440
-
441
- const z_adj = z - 1;
442
- let x = c[0]!;
443
- for (let i = 1; i < g + 2; i++) {
444
- x += c[i]! / (z_adj + i);
445
- }
446
- const t = z_adj + g + 0.5;
447
- return 0.5 * Math.log(2 * Math.PI) + (z_adj + 0.5) * Math.log(t) - t + Math.log(x);
448
- };
package/src/lib/git.ts CHANGED
@@ -6,6 +6,30 @@ import type {Flavored} from './types.js';
6
6
  import {to_file_path} from './path.js';
7
7
  import {fs_exists} from './fs.js';
8
8
 
9
+ /**
10
+ * Basic git repository info.
11
+ */
12
+ export interface GitInfo {
13
+ commit: string | null;
14
+ branch: string | null;
15
+ }
16
+
17
+ /**
18
+ * Get basic git info (commit hash and branch name) without throwing.
19
+ * Returns null values if git commands fail (e.g., not in a git repo).
20
+ */
21
+ export const git_info_get = async (options?: SpawnOptions): Promise<GitInfo> => {
22
+ const [commit_result, branch_result] = await Promise.all([
23
+ spawn_out('git', ['rev-parse', 'HEAD'], options).catch(() => ({stdout: null})),
24
+ spawn_out('git', ['rev-parse', '--abbrev-ref', 'HEAD'], options).catch(() => ({stdout: null})),
25
+ ]);
26
+
27
+ return {
28
+ commit: commit_result.stdout?.trim() || null,
29
+ branch: branch_result.stdout?.trim() || null,
30
+ };
31
+ };
32
+
9
33
  export const GitOrigin = z.string();
10
34
  export type GitOrigin = Flavored<string, 'GitOrigin'>;
11
35
 
package/src/lib/maths.ts CHANGED
@@ -89,3 +89,11 @@ export const GR_9 = 76.01315561749645;
89
89
  * golden ratio/mean constants, `1/(GR**9)`, useful for scaling: https://wikipedia.org/wiki/Golden_ratio
90
90
  */
91
91
  export const GR_9i = 0.013155617496424835;
92
+
93
+ /**
94
+ * Format a number with fixed decimal places and thousands separators.
95
+ */
96
+ export const format_number = (n: number, decimals: number = 2): string => {
97
+ if (!isFinite(n)) return String(n);
98
+ return n.toFixed(decimals).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
99
+ };
package/src/lib/stats.ts CHANGED
@@ -272,7 +272,7 @@ export const stats_outliers_mad = (
272
272
  /**
273
273
  * Common z-scores for confidence intervals.
274
274
  */
275
- export const CONFIDENCE_Z_SCORES: Record<number, number> = {
275
+ export const STATS_CONFIDENCE_Z_SCORES: Record<number, number> = {
276
276
  0.8: 1.282,
277
277
  0.9: 1.645,
278
278
  0.95: 1.96,
@@ -286,18 +286,18 @@ export const CONFIDENCE_Z_SCORES: Record<number, number> = {
286
286
  *
287
287
  * @example
288
288
  * ```ts
289
- * confidence_level_to_z_score(0.95); // 1.96
290
- * confidence_level_to_z_score(0.99); // 2.576
289
+ * stats_confidence_level_to_z_score(0.95); // 1.96
290
+ * stats_confidence_level_to_z_score(0.99); // 2.576
291
291
  * ```
292
292
  */
293
- export const confidence_level_to_z_score = (level: number): number => {
293
+ export const stats_confidence_level_to_z_score = (level: number): number => {
294
294
  if (level <= 0 || level >= 1) {
295
295
  throw new Error('Confidence level must be between 0 and 1 (exclusive)');
296
296
  }
297
297
 
298
298
  // Check lookup table first
299
- if (level in CONFIDENCE_Z_SCORES) {
300
- return CONFIDENCE_Z_SCORES[level]!;
299
+ if (level in STATS_CONFIDENCE_Z_SCORES) {
300
+ return STATS_CONFIDENCE_Z_SCORES[level]!;
301
301
  }
302
302
 
303
303
  // For confidence level c, we want z such that P(-z < Z < z) = c
@@ -334,20 +334,201 @@ export interface StatsConfidenceIntervalOptions {
334
334
  export const stats_confidence_interval = (
335
335
  values: Array<number>,
336
336
  options?: StatsConfidenceIntervalOptions,
337
+ ): [number, number] => {
338
+ if (values.length === 0) return [NaN, NaN];
339
+
340
+ const mean = stats_mean(values);
341
+ const std_dev = stats_std_dev(values, mean);
342
+
343
+ return stats_confidence_interval_from_summary(mean, std_dev, values.length, options);
344
+ };
345
+
346
+ /**
347
+ * Calculate confidence interval from summary statistics (mean, std_dev, sample_size).
348
+ * Useful when raw data is not available.
349
+ * @param mean - Mean of the data
350
+ * @param std_dev - Standard deviation of the data
351
+ * @param sample_size - Number of samples
352
+ * @param options - Configuration options
353
+ * @returns [lower_bound, upper_bound]
354
+ */
355
+ export const stats_confidence_interval_from_summary = (
356
+ mean: number,
357
+ std_dev: number,
358
+ sample_size: number,
359
+ options?: StatsConfidenceIntervalOptions,
337
360
  ): [number, number] => {
338
361
  // z_score takes precedence, then confidence_level, then default
339
362
  const z_score =
340
363
  options?.z_score ??
341
- (options?.confidence_level ? confidence_level_to_z_score(options.confidence_level) : null) ??
364
+ (options?.confidence_level
365
+ ? stats_confidence_level_to_z_score(options.confidence_level)
366
+ : null) ??
342
367
  DEFAULT_CONFIDENCE_Z;
343
368
 
344
- if (values.length === 0) return [NaN, NaN];
369
+ if (sample_size === 0) return [NaN, NaN];
345
370
 
346
- const mean = stats_mean(values);
347
- const std_dev = stats_std_dev(values, mean);
348
-
349
- const se = std_dev / Math.sqrt(values.length);
371
+ const se = std_dev / Math.sqrt(sample_size);
350
372
  const margin = z_score * se;
351
373
 
352
374
  return [mean - margin, mean + margin];
353
375
  };
376
+
377
+ // Hypothesis Testing Utilities
378
+ // These functions support statistical significance testing (t-tests, p-values, etc.)
379
+
380
+ /**
381
+ * Result from Welch's t-test calculation.
382
+ */
383
+ export interface StatsWelchTTestResult {
384
+ /** The t-statistic */
385
+ t_statistic: number;
386
+ /** Welch-Satterthwaite degrees of freedom */
387
+ degrees_of_freedom: number;
388
+ }
389
+
390
+ /**
391
+ * Calculate Welch's t-test statistic and degrees of freedom.
392
+ * Welch's t-test is more robust than Student's t-test when variances are unequal.
393
+ *
394
+ * @param mean1 - Mean of first sample
395
+ * @param std1 - Standard deviation of first sample
396
+ * @param n1 - Size of first sample
397
+ * @param mean2 - Mean of second sample
398
+ * @param std2 - Standard deviation of second sample
399
+ * @param n2 - Size of second sample
400
+ */
401
+ export const stats_welch_t_test = (
402
+ mean1: number,
403
+ std1: number,
404
+ n1: number,
405
+ mean2: number,
406
+ std2: number,
407
+ n2: number,
408
+ ): StatsWelchTTestResult => {
409
+ const var1 = std1 ** 2;
410
+ const var2 = std2 ** 2;
411
+
412
+ const se1 = var1 / n1;
413
+ const se2 = var2 / n2;
414
+
415
+ const t_statistic = (mean1 - mean2) / Math.sqrt(se1 + se2);
416
+
417
+ // Welch-Satterthwaite degrees of freedom
418
+ const numerator = (se1 + se2) ** 2;
419
+ const denominator = se1 ** 2 / (n1 - 1) + se2 ** 2 / (n2 - 1);
420
+ const degrees_of_freedom = numerator / denominator;
421
+
422
+ return {t_statistic, degrees_of_freedom};
423
+ };
424
+
425
+ /**
426
+ * Standard normal CDF approximation (Abramowitz and Stegun formula 7.1.26).
427
+ */
428
+ export const stats_normal_cdf = (x: number): number => {
429
+ const t = 1 / (1 + 0.2316419 * Math.abs(x));
430
+ const d = 0.3989423 * Math.exp((-x * x) / 2);
431
+ const p =
432
+ d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
433
+ return x > 0 ? 1 - p : p;
434
+ };
435
+
436
+ /**
437
+ * Log gamma function approximation (Lanczos approximation).
438
+ */
439
+ export const stats_ln_gamma = (z: number): number => {
440
+ const g = 7;
441
+ const c = [
442
+ 0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
443
+ -176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
444
+ 1.5056327351493116e-7,
445
+ ];
446
+
447
+ if (z < 0.5) {
448
+ return Math.log(Math.PI / Math.sin(Math.PI * z)) - stats_ln_gamma(1 - z);
449
+ }
450
+
451
+ const z_adj = z - 1;
452
+ let x = c[0]!;
453
+ for (let i = 1; i < g + 2; i++) {
454
+ x += c[i]! / (z_adj + i);
455
+ }
456
+ const t = z_adj + g + 0.5;
457
+ return 0.5 * Math.log(2 * Math.PI) + (z_adj + 0.5) * Math.log(t) - t + Math.log(x);
458
+ };
459
+
460
+ /**
461
+ * Approximate regularized incomplete beta function for p-value calculation.
462
+ * Uses continued fraction expansion for reasonable accuracy.
463
+ */
464
+ export const stats_incomplete_beta = (x: number, a: number, b: number): number => {
465
+ // Simple approximation using the relationship between beta and normal distributions
466
+ // For our use case (t-distribution p-values), this provides sufficient accuracy
467
+ if (x <= 0) return 0;
468
+ if (x >= 1) return 1;
469
+
470
+ // Use symmetry if needed
471
+ if (x > (a + 1) / (a + b + 2)) {
472
+ return 1 - stats_incomplete_beta(1 - x, b, a);
473
+ }
474
+
475
+ // Continued fraction approximation (first few terms)
476
+ const lnBeta = stats_ln_gamma(a) + stats_ln_gamma(b) - stats_ln_gamma(a + b);
477
+ const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
478
+
479
+ // Simple continued fraction (limited iterations for speed)
480
+ let f = 1;
481
+ let c = 1;
482
+ let d = 0;
483
+
484
+ for (let m = 1; m <= 100; m++) {
485
+ const m2 = 2 * m;
486
+
487
+ // Even step
488
+ let aa = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2));
489
+ d = 1 + aa * d;
490
+ if (Math.abs(d) < 1e-30) d = 1e-30;
491
+ c = 1 + aa / c;
492
+ if (Math.abs(c) < 1e-30) c = 1e-30;
493
+ d = 1 / d;
494
+ f *= d * c;
495
+
496
+ // Odd step
497
+ aa = (-(a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
498
+ d = 1 + aa * d;
499
+ if (Math.abs(d) < 1e-30) d = 1e-30;
500
+ c = 1 + aa / c;
501
+ if (Math.abs(c) < 1e-30) c = 1e-30;
502
+ d = 1 / d;
503
+ const delta = d * c;
504
+ f *= delta;
505
+
506
+ if (Math.abs(delta - 1) < 1e-8) break;
507
+ }
508
+
509
+ return front * f;
510
+ };
511
+
512
+ /**
513
+ * Approximate two-tailed p-value from t-distribution.
514
+ * For large df (>100), uses normal approximation.
515
+ * For smaller df, uses incomplete beta function.
516
+ *
517
+ * @param t - Absolute value of t-statistic
518
+ * @param df - Degrees of freedom
519
+ * @returns Two-tailed p-value
520
+ */
521
+ export const stats_t_distribution_p_value = (t: number, df: number): number => {
522
+ // Use normal approximation for large df
523
+ if (df > 100) {
524
+ return 2 * (1 - stats_normal_cdf(t));
525
+ }
526
+
527
+ // For smaller df, use a more accurate approximation
528
+ // Based on the incomplete beta function relationship
529
+ const x = df / (df + t * t);
530
+ const a = df / 2;
531
+ const b = 0.5;
532
+
533
+ return stats_incomplete_beta(x, a, b);
534
+ };
package/src/lib/string.ts CHANGED
@@ -97,3 +97,69 @@ export const strip_ansi = (str: string): string => str.replaceAll(/\x1B\[[0-9;]*
97
97
  */
98
98
  export const stringify = (value: unknown): string =>
99
99
  typeof value === 'bigint' ? value + 'n' : (JSON.stringify(value) ?? String(value)); // eslint-disable-line @typescript-eslint/no-unnecessary-condition
100
+
101
+ /**
102
+ * Calculate the display width of a string in terminal columns.
103
+ * - Strips ANSI escape codes (they have 0 width)
104
+ * - Emojis and other wide characters take 2 columns
105
+ * - Tab characters take 4 columns
106
+ * - Newlines and other control characters take 0 columns
107
+ * - Uses `Intl.Segmenter` to properly handle grapheme clusters (e.g., family emoji "👨‍👩‍👧‍👦")
108
+ */
109
+ export const string_display_width = (str: string): number => {
110
+ // Strip ANSI codes first (they have 0 display width)
111
+ const clean = strip_ansi(str);
112
+
113
+ let width = 0;
114
+ const segmenter = new Intl.Segmenter();
115
+ for (const {segment} of segmenter.segment(clean)) {
116
+ const code = segment.codePointAt(0)!;
117
+
118
+ // Handle control characters
119
+ if (code === 0x09) {
120
+ // Tab = 4 columns
121
+ width += 4;
122
+ continue;
123
+ }
124
+ if (code < 0x20 || (code >= 0x7f && code < 0xa0)) {
125
+ // Other control characters (including newline) = 0 width
126
+ continue;
127
+ }
128
+
129
+ // Emoji and other wide characters (rough heuristic)
130
+ // - Most emoji are in range 0x1F300-0x1FAFF
131
+ // - Some are in 0x2600-0x27BF (misc symbols)
132
+ // - CJK characters 0x4E00-0x9FFF also double-width
133
+ // - Grapheme clusters with multiple code points (like ZWJ sequences) are typically emoji
134
+ if (
135
+ segment.length > 1 || // Multi-codepoint graphemes (ZWJ sequences, etc.)
136
+ (code >= 0x1f300 && code <= 0x1faff) ||
137
+ (code >= 0x2600 && code <= 0x27bf) ||
138
+ (code >= 0x1f600 && code <= 0x1f64f) ||
139
+ (code >= 0x1f680 && code <= 0x1f6ff) ||
140
+ (code >= 0x4e00 && code <= 0x9fff) // CJK
141
+ ) {
142
+ width += 2;
143
+ } else {
144
+ width += 1;
145
+ }
146
+ }
147
+ return width;
148
+ };
149
+
150
+ /**
151
+ * Pad a string to a target display width (accounting for wide characters).
152
+ */
153
+ export const pad_width = (
154
+ str: string,
155
+ target_width: number,
156
+ align: 'left' | 'right' = 'left',
157
+ ): string => {
158
+ const current_width = string_display_width(str);
159
+ const padding = Math.max(0, target_width - current_width);
160
+ if (align === 'left') {
161
+ return str + ' '.repeat(padding);
162
+ } else {
163
+ return ' '.repeat(padding) + str;
164
+ }
165
+ };