@fuzdev/fuz_util 0.43.0 → 0.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark_baseline.d.ts.map +1 -1
- package/dist/benchmark_baseline.js +5 -32
- package/dist/benchmark_format.d.ts +7 -12
- package/dist/benchmark_format.d.ts.map +1 -1
- package/dist/benchmark_format.js +19 -80
- package/dist/benchmark_stats.d.ts.map +1 -1
- package/dist/benchmark_stats.js +3 -120
- package/dist/git.d.ts +12 -0
- package/dist/git.d.ts.map +1 -1
- package/dist/git.js +14 -0
- package/dist/maths.d.ts +4 -0
- package/dist/maths.d.ts.map +1 -1
- package/dist/maths.js +8 -0
- package/dist/source_json.d.ts +4 -4
- package/dist/stats.d.ts +58 -4
- package/dist/stats.d.ts.map +1 -1
- package/dist/stats.js +151 -11
- package/dist/string.d.ts +13 -0
- package/dist/string.d.ts.map +1 -1
- package/dist/string.js +58 -0
- package/dist/time.d.ts +4 -0
- package/dist/time.d.ts.map +1 -1
- package/dist/time.js +4 -0
- package/package.json +1 -1
- package/src/lib/benchmark_baseline.ts +5 -40
- package/src/lib/benchmark_format.ts +19 -84
- package/src/lib/benchmark_stats.ts +4 -141
- package/src/lib/git.ts +24 -0
- package/src/lib/maths.ts +8 -0
- package/src/lib/stats.ts +193 -12
- package/src/lib/string.ts +66 -0
- package/src/lib/time.ts +5 -0
|
@@ -1,48 +1,7 @@
|
|
|
1
1
|
import type {BenchmarkResult, BenchmarkGroup} from './benchmark_types.js';
|
|
2
|
-
import {time_unit_detect_best, time_format,
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
* Calculate the display width of a string in terminal columns.
|
|
6
|
-
* Emojis and other wide characters take 2 columns.
|
|
7
|
-
*/
|
|
8
|
-
const string_display_width = (str: string): number => {
|
|
9
|
-
let width = 0;
|
|
10
|
-
for (const char of str) {
|
|
11
|
-
const code = char.codePointAt(0)!;
|
|
12
|
-
// Emoji and other wide characters (rough heuristic)
|
|
13
|
-
// - Most emoji are in range 0x1F300-0x1FAFF
|
|
14
|
-
// - Some are in 0x2600-0x27BF (misc symbols)
|
|
15
|
-
// - CJK characters 0x4E00-0x9FFF also double-width but not handling here
|
|
16
|
-
if (
|
|
17
|
-
(code >= 0x1f300 && code <= 0x1faff) ||
|
|
18
|
-
(code >= 0x2600 && code <= 0x27bf) ||
|
|
19
|
-
(code >= 0x1f600 && code <= 0x1f64f) ||
|
|
20
|
-
(code >= 0x1f680 && code <= 0x1f6ff)
|
|
21
|
-
) {
|
|
22
|
-
width += 2;
|
|
23
|
-
} else {
|
|
24
|
-
width += 1;
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
return width;
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Pad a string to a target display width (accounting for wide characters).
|
|
32
|
-
*/
|
|
33
|
-
const pad_to_width = (
|
|
34
|
-
str: string,
|
|
35
|
-
target_width: number,
|
|
36
|
-
align: 'left' | 'right' = 'left',
|
|
37
|
-
): string => {
|
|
38
|
-
const current_width = string_display_width(str);
|
|
39
|
-
const padding = Math.max(0, target_width - current_width);
|
|
40
|
-
if (align === 'left') {
|
|
41
|
-
return str + ' '.repeat(padding);
|
|
42
|
-
} else {
|
|
43
|
-
return ' '.repeat(padding) + str;
|
|
44
|
-
}
|
|
45
|
-
};
|
|
2
|
+
import {time_unit_detect_best, time_format, TIME_UNIT_DISPLAY} from './time.js';
|
|
3
|
+
import {string_display_width, pad_width} from './string.js';
|
|
4
|
+
import {format_number} from './maths.js';
|
|
46
5
|
|
|
47
6
|
/**
|
|
48
7
|
* Format results as an ASCII table with percentiles, min/max, and relative performance.
|
|
@@ -53,19 +12,13 @@ const pad_to_width = (
|
|
|
53
12
|
* @example
|
|
54
13
|
* ```ts
|
|
55
14
|
* console.log(benchmark_format_table(results));
|
|
56
|
-
* //
|
|
57
|
-
* // │
|
|
58
|
-
* //
|
|
59
|
-
* // │
|
|
60
|
-
* // │
|
|
61
|
-
* //
|
|
15
|
+
* // ┌─────────────┬────────────┬────────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
|
|
16
|
+
* // │ Task Name │ ops/sec │ median(μs) │ p75 (μs) │ p90 (μs) │ p95 (μs) │ p99 (μs) │ min (μs) │ max (μs) │ vs Best │
|
|
17
|
+
* // ├─────────────┼────────────┼────────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
|
|
18
|
+
* // │ slugify v2 │ 1,237,144 │ 0.81 │ 0.85 │ 0.89 │ 0.95 │ 1.20 │ 0.72 │ 2.45 │ baseline │
|
|
19
|
+
* // │ slugify │ 261,619 │ 3.82 │ 3.95 │ 4.12 │ 4.35 │ 5.10 │ 3.21 │ 12.45 │ 4.73x │
|
|
20
|
+
* // └─────────────┴────────────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
|
|
62
21
|
* ```
|
|
63
|
-
*
|
|
64
|
-
* **Performance tier animals:**
|
|
65
|
-
* - 🐆 Cheetah: >1M ops/sec (extremely fast)
|
|
66
|
-
* - 🐇 Rabbit: >100K ops/sec (fast)
|
|
67
|
-
* - 🐢 Turtle: >10K ops/sec (moderate)
|
|
68
|
-
* - 🐌 Snail: <10K ops/sec (slow)
|
|
69
22
|
*/
|
|
70
23
|
export const benchmark_format_table = (results: Array<BenchmarkResult>): string => {
|
|
71
24
|
if (results.length === 0) return '(no results)';
|
|
@@ -73,7 +26,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
73
26
|
// Detect best unit for all results
|
|
74
27
|
const mean_times = results.map((r) => r.stats.mean_ns);
|
|
75
28
|
const unit = time_unit_detect_best(mean_times);
|
|
76
|
-
const unit_str =
|
|
29
|
+
const unit_str = TIME_UNIT_DISPLAY[unit];
|
|
77
30
|
|
|
78
31
|
// Find fastest for relative comparison
|
|
79
32
|
const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
|
|
@@ -82,7 +35,6 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
82
35
|
|
|
83
36
|
// Header with unit
|
|
84
37
|
rows.push([
|
|
85
|
-
'',
|
|
86
38
|
'Task Name',
|
|
87
39
|
'ops/sec',
|
|
88
40
|
`median (${unit_str})`,
|
|
@@ -97,7 +49,6 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
97
49
|
|
|
98
50
|
// Data rows - all use same unit
|
|
99
51
|
results.forEach((r) => {
|
|
100
|
-
const tier = get_perf_tier(r.stats.ops_per_second);
|
|
101
52
|
const ops_sec = benchmark_format_number(r.stats.ops_per_second, 2);
|
|
102
53
|
const median = time_format(r.stats.median_ns, unit, 2).replace(unit_str, '').trim();
|
|
103
54
|
const p75 = time_format(r.stats.p75_ns, unit, 2).replace(unit_str, '').trim();
|
|
@@ -111,7 +62,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
111
62
|
const ratio = fastest_ops / r.stats.ops_per_second;
|
|
112
63
|
const vs_best = ratio === 1.0 ? 'baseline' : `${ratio.toFixed(2)}x`;
|
|
113
64
|
|
|
114
|
-
rows.push([
|
|
65
|
+
rows.push([r.name, ops_sec, median, p75, p90, p95, p99, min, max, vs_best]);
|
|
115
66
|
});
|
|
116
67
|
|
|
117
68
|
// Calculate column widths (using display width for proper emoji handling)
|
|
@@ -126,7 +77,7 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
126
77
|
lines.push('┌' + widths.map((w) => '─'.repeat(w + 2)).join('┬') + '┐');
|
|
127
78
|
|
|
128
79
|
// Header
|
|
129
|
-
const header = rows[0]!.map((cell, i) => ' ' +
|
|
80
|
+
const header = rows[0]!.map((cell, i) => ' ' + pad_width(cell, widths[i]!) + ' ').join('│');
|
|
130
81
|
lines.push('│' + header + '│');
|
|
131
82
|
|
|
132
83
|
// Header separator
|
|
@@ -136,11 +87,11 @@ export const benchmark_format_table = (results: Array<BenchmarkResult>): string
|
|
|
136
87
|
for (let i = 1; i < rows.length; i++) {
|
|
137
88
|
const row = rows[i]!.map((cell, col_i) => {
|
|
138
89
|
const width = widths[col_i]!;
|
|
139
|
-
// Left-align
|
|
140
|
-
if (col_i === 0
|
|
141
|
-
return ' ' +
|
|
90
|
+
// Left-align task name, right-align numbers
|
|
91
|
+
if (col_i === 0) {
|
|
92
|
+
return ' ' + pad_width(cell, width, 'left') + ' ';
|
|
142
93
|
} else {
|
|
143
|
-
return ' ' +
|
|
94
|
+
return ' ' + pad_width(cell, width, 'right') + ' ';
|
|
144
95
|
}
|
|
145
96
|
}).join('│');
|
|
146
97
|
lines.push('│' + row + '│');
|
|
@@ -173,7 +124,7 @@ export const benchmark_format_markdown = (results: Array<BenchmarkResult>): stri
|
|
|
173
124
|
// Detect best unit for all results
|
|
174
125
|
const mean_times = results.map((r) => r.stats.mean_ns);
|
|
175
126
|
const unit = time_unit_detect_best(mean_times);
|
|
176
|
-
const unit_str =
|
|
127
|
+
const unit_str = TIME_UNIT_DISPLAY[unit];
|
|
177
128
|
|
|
178
129
|
// Find fastest for relative comparison
|
|
179
130
|
const fastest_ops = Math.max(...results.map((r) => r.stats.ops_per_second));
|
|
@@ -356,24 +307,8 @@ export const benchmark_format_table_grouped = (
|
|
|
356
307
|
return sections.join('\n');
|
|
357
308
|
};
|
|
358
309
|
|
|
359
|
-
// TODO consider extracting to a general format utility module when more formatters are needed
|
|
360
310
|
/**
|
|
361
311
|
* Format a number with fixed decimal places and thousands separators.
|
|
312
|
+
* @see {@link format_number} in maths.ts for the underlying implementation.
|
|
362
313
|
*/
|
|
363
|
-
export const benchmark_format_number =
|
|
364
|
-
if (!isFinite(n)) return String(n);
|
|
365
|
-
return n.toFixed(decimals).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
|
|
366
|
-
};
|
|
367
|
-
|
|
368
|
-
/**
|
|
369
|
-
* Get performance tier symbol based on ops/sec.
|
|
370
|
-
*/
|
|
371
|
-
const get_perf_tier = (ops_per_sec: number): string => {
|
|
372
|
-
if (ops_per_sec >= 1_000_000) return '🐆'; // > 1M ops/sec (cheetah - extremely fast)
|
|
373
|
-
if (ops_per_sec >= 100_000) return '🐇'; // > 100K ops/sec (rabbit - fast)
|
|
374
|
-
if (ops_per_sec >= 10_000) return '🐢'; // > 10K ops/sec (turtle - moderate)
|
|
375
|
-
return '🐌'; // < 10K ops/sec (snail - slow)
|
|
376
|
-
};
|
|
377
|
-
|
|
378
|
-
/** Unit labels for display (μs instead of us). */
|
|
379
|
-
const UNIT_LABELS: Record<TimeUnit, string> = {ns: 'ns', us: 'μs', ms: 'ms', s: 's'};
|
|
314
|
+
export const benchmark_format_number = format_number;
|
|
@@ -14,6 +14,8 @@ import {
|
|
|
14
14
|
stats_min_max,
|
|
15
15
|
stats_confidence_interval,
|
|
16
16
|
stats_outliers_mad,
|
|
17
|
+
stats_welch_t_test,
|
|
18
|
+
stats_t_distribution_p_value,
|
|
17
19
|
} from './stats.js';
|
|
18
20
|
|
|
19
21
|
/**
|
|
@@ -226,7 +228,7 @@ export const benchmark_stats_compare = (
|
|
|
226
228
|
// When there's no variance, any difference is 100% reliable (p=0) or identical (p=1)
|
|
227
229
|
p_value = a.mean_ns === b.mean_ns ? 1 : 0;
|
|
228
230
|
} else {
|
|
229
|
-
const {t_statistic, degrees_of_freedom} =
|
|
231
|
+
const {t_statistic, degrees_of_freedom} = stats_welch_t_test(
|
|
230
232
|
a.mean_ns,
|
|
231
233
|
a.std_dev_ns,
|
|
232
234
|
a.sample_size,
|
|
@@ -235,7 +237,7 @@ export const benchmark_stats_compare = (
|
|
|
235
237
|
b.sample_size,
|
|
236
238
|
);
|
|
237
239
|
// Calculate two-tailed p-value using t-distribution approximation
|
|
238
|
-
p_value =
|
|
240
|
+
p_value = stats_t_distribution_p_value(Math.abs(t_statistic), degrees_of_freedom);
|
|
239
241
|
}
|
|
240
242
|
|
|
241
243
|
// Cohen's d effect size
|
|
@@ -307,142 +309,3 @@ export const benchmark_stats_compare = (
|
|
|
307
309
|
recommendation,
|
|
308
310
|
};
|
|
309
311
|
};
|
|
310
|
-
|
|
311
|
-
/**
|
|
312
|
-
* Calculate Welch's t-test statistic and degrees of freedom.
|
|
313
|
-
* Welch's t-test is more robust than Student's t-test when variances are unequal.
|
|
314
|
-
*/
|
|
315
|
-
const welch_t_test = (
|
|
316
|
-
mean1: number,
|
|
317
|
-
std1: number,
|
|
318
|
-
n1: number,
|
|
319
|
-
mean2: number,
|
|
320
|
-
std2: number,
|
|
321
|
-
n2: number,
|
|
322
|
-
): {t_statistic: number; degrees_of_freedom: number} => {
|
|
323
|
-
const var1 = std1 ** 2;
|
|
324
|
-
const var2 = std2 ** 2;
|
|
325
|
-
|
|
326
|
-
const se1 = var1 / n1;
|
|
327
|
-
const se2 = var2 / n2;
|
|
328
|
-
|
|
329
|
-
const t_statistic = (mean1 - mean2) / Math.sqrt(se1 + se2);
|
|
330
|
-
|
|
331
|
-
// Welch-Satterthwaite degrees of freedom
|
|
332
|
-
const numerator = (se1 + se2) ** 2;
|
|
333
|
-
const denominator = se1 ** 2 / (n1 - 1) + se2 ** 2 / (n2 - 1);
|
|
334
|
-
const degrees_of_freedom = numerator / denominator;
|
|
335
|
-
|
|
336
|
-
return {t_statistic, degrees_of_freedom};
|
|
337
|
-
};
|
|
338
|
-
|
|
339
|
-
/**
|
|
340
|
-
* Approximate p-value from t-distribution using the approximation formula.
|
|
341
|
-
* This avoids requiring a full t-distribution table or library.
|
|
342
|
-
* For large df (>30), this approximation is very accurate.
|
|
343
|
-
*/
|
|
344
|
-
const t_distribution_p_value = (t: number, df: number): number => {
|
|
345
|
-
// Use normal approximation for large df
|
|
346
|
-
if (df > 100) {
|
|
347
|
-
// Standard normal CDF approximation
|
|
348
|
-
return 2 * (1 - normal_cdf(t));
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// For smaller df, use a more accurate approximation
|
|
352
|
-
// Based on the incomplete beta function relationship
|
|
353
|
-
const x = df / (df + t * t);
|
|
354
|
-
const a = df / 2;
|
|
355
|
-
const b = 0.5;
|
|
356
|
-
|
|
357
|
-
// Approximation of regularized incomplete beta function
|
|
358
|
-
// This is accurate to about 4 decimal places for typical use cases
|
|
359
|
-
const beta_approx = incomplete_beta_approx(x, a, b);
|
|
360
|
-
return beta_approx;
|
|
361
|
-
};
|
|
362
|
-
|
|
363
|
-
/**
|
|
364
|
-
* Standard normal CDF approximation (Abramowitz and Stegun formula 7.1.26).
|
|
365
|
-
*/
|
|
366
|
-
const normal_cdf = (x: number): number => {
|
|
367
|
-
const t = 1 / (1 + 0.2316419 * Math.abs(x));
|
|
368
|
-
const d = 0.3989423 * Math.exp((-x * x) / 2);
|
|
369
|
-
const p =
|
|
370
|
-
d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
|
|
371
|
-
return x > 0 ? 1 - p : p;
|
|
372
|
-
};
|
|
373
|
-
|
|
374
|
-
/**
|
|
375
|
-
* Approximate regularized incomplete beta function for p-value calculation.
|
|
376
|
-
* Uses continued fraction expansion for reasonable accuracy.
|
|
377
|
-
*/
|
|
378
|
-
const incomplete_beta_approx = (x: number, a: number, b: number): number => {
|
|
379
|
-
// Simple approximation using the relationship between beta and normal distributions
|
|
380
|
-
// For our use case (t-distribution p-values), this provides sufficient accuracy
|
|
381
|
-
if (x <= 0) return 0;
|
|
382
|
-
if (x >= 1) return 1;
|
|
383
|
-
|
|
384
|
-
// Use symmetry if needed
|
|
385
|
-
if (x > (a + 1) / (a + b + 2)) {
|
|
386
|
-
return 1 - incomplete_beta_approx(1 - x, b, a);
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
// Continued fraction approximation (first few terms)
|
|
390
|
-
const lnBeta = ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b);
|
|
391
|
-
const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
|
|
392
|
-
|
|
393
|
-
// Simple continued fraction (limited iterations for speed)
|
|
394
|
-
let f = 1;
|
|
395
|
-
let c = 1;
|
|
396
|
-
let d = 0;
|
|
397
|
-
|
|
398
|
-
for (let m = 1; m <= 100; m++) {
|
|
399
|
-
const m2 = 2 * m;
|
|
400
|
-
|
|
401
|
-
// Even step
|
|
402
|
-
let aa = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2));
|
|
403
|
-
d = 1 + aa * d;
|
|
404
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
405
|
-
c = 1 + aa / c;
|
|
406
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
407
|
-
d = 1 / d;
|
|
408
|
-
f *= d * c;
|
|
409
|
-
|
|
410
|
-
// Odd step
|
|
411
|
-
aa = (-(a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
|
|
412
|
-
d = 1 + aa * d;
|
|
413
|
-
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
414
|
-
c = 1 + aa / c;
|
|
415
|
-
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
416
|
-
d = 1 / d;
|
|
417
|
-
const delta = d * c;
|
|
418
|
-
f *= delta;
|
|
419
|
-
|
|
420
|
-
if (Math.abs(delta - 1) < 1e-8) break;
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
return front * f;
|
|
424
|
-
};
|
|
425
|
-
|
|
426
|
-
/**
|
|
427
|
-
* Log gamma function approximation (Lanczos approximation).
|
|
428
|
-
*/
|
|
429
|
-
const ln_gamma = (z: number): number => {
|
|
430
|
-
const g = 7;
|
|
431
|
-
const c = [
|
|
432
|
-
0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
|
|
433
|
-
-176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
|
|
434
|
-
1.5056327351493116e-7,
|
|
435
|
-
];
|
|
436
|
-
|
|
437
|
-
if (z < 0.5) {
|
|
438
|
-
return Math.log(Math.PI / Math.sin(Math.PI * z)) - ln_gamma(1 - z);
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
const z_adj = z - 1;
|
|
442
|
-
let x = c[0]!;
|
|
443
|
-
for (let i = 1; i < g + 2; i++) {
|
|
444
|
-
x += c[i]! / (z_adj + i);
|
|
445
|
-
}
|
|
446
|
-
const t = z_adj + g + 0.5;
|
|
447
|
-
return 0.5 * Math.log(2 * Math.PI) + (z_adj + 0.5) * Math.log(t) - t + Math.log(x);
|
|
448
|
-
};
|
package/src/lib/git.ts
CHANGED
|
@@ -6,6 +6,30 @@ import type {Flavored} from './types.js';
|
|
|
6
6
|
import {to_file_path} from './path.js';
|
|
7
7
|
import {fs_exists} from './fs.js';
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Basic git repository info.
|
|
11
|
+
*/
|
|
12
|
+
export interface GitInfo {
|
|
13
|
+
commit: string | null;
|
|
14
|
+
branch: string | null;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Get basic git info (commit hash and branch name) without throwing.
|
|
19
|
+
* Returns null values if git commands fail (e.g., not in a git repo).
|
|
20
|
+
*/
|
|
21
|
+
export const git_info_get = async (options?: SpawnOptions): Promise<GitInfo> => {
|
|
22
|
+
const [commit_result, branch_result] = await Promise.all([
|
|
23
|
+
spawn_out('git', ['rev-parse', 'HEAD'], options).catch(() => ({stdout: null})),
|
|
24
|
+
spawn_out('git', ['rev-parse', '--abbrev-ref', 'HEAD'], options).catch(() => ({stdout: null})),
|
|
25
|
+
]);
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
commit: commit_result.stdout?.trim() || null,
|
|
29
|
+
branch: branch_result.stdout?.trim() || null,
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
|
|
9
33
|
export const GitOrigin = z.string();
|
|
10
34
|
export type GitOrigin = Flavored<string, 'GitOrigin'>;
|
|
11
35
|
|
package/src/lib/maths.ts
CHANGED
|
@@ -89,3 +89,11 @@ export const GR_9 = 76.01315561749645;
|
|
|
89
89
|
* golden ratio/mean constants, `1/(GR**9)`, useful for scaling: https://wikipedia.org/wiki/Golden_ratio
|
|
90
90
|
*/
|
|
91
91
|
export const GR_9i = 0.013155617496424835;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Format a number with fixed decimal places and thousands separators.
|
|
95
|
+
*/
|
|
96
|
+
export const format_number = (n: number, decimals: number = 2): string => {
|
|
97
|
+
if (!isFinite(n)) return String(n);
|
|
98
|
+
return n.toFixed(decimals).replace(/\B(?=(\d{3})+(?!\d))/g, ',');
|
|
99
|
+
};
|
package/src/lib/stats.ts
CHANGED
|
@@ -272,7 +272,7 @@ export const stats_outliers_mad = (
|
|
|
272
272
|
/**
|
|
273
273
|
* Common z-scores for confidence intervals.
|
|
274
274
|
*/
|
|
275
|
-
export const
|
|
275
|
+
export const STATS_CONFIDENCE_Z_SCORES: Record<number, number> = {
|
|
276
276
|
0.8: 1.282,
|
|
277
277
|
0.9: 1.645,
|
|
278
278
|
0.95: 1.96,
|
|
@@ -286,18 +286,18 @@ export const CONFIDENCE_Z_SCORES: Record<number, number> = {
|
|
|
286
286
|
*
|
|
287
287
|
* @example
|
|
288
288
|
* ```ts
|
|
289
|
-
*
|
|
290
|
-
*
|
|
289
|
+
* stats_confidence_level_to_z_score(0.95); // 1.96
|
|
290
|
+
* stats_confidence_level_to_z_score(0.99); // 2.576
|
|
291
291
|
* ```
|
|
292
292
|
*/
|
|
293
|
-
export const
|
|
293
|
+
export const stats_confidence_level_to_z_score = (level: number): number => {
|
|
294
294
|
if (level <= 0 || level >= 1) {
|
|
295
295
|
throw new Error('Confidence level must be between 0 and 1 (exclusive)');
|
|
296
296
|
}
|
|
297
297
|
|
|
298
298
|
// Check lookup table first
|
|
299
|
-
if (level in
|
|
300
|
-
return
|
|
299
|
+
if (level in STATS_CONFIDENCE_Z_SCORES) {
|
|
300
|
+
return STATS_CONFIDENCE_Z_SCORES[level]!;
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
// For confidence level c, we want z such that P(-z < Z < z) = c
|
|
@@ -334,20 +334,201 @@ export interface StatsConfidenceIntervalOptions {
|
|
|
334
334
|
export const stats_confidence_interval = (
|
|
335
335
|
values: Array<number>,
|
|
336
336
|
options?: StatsConfidenceIntervalOptions,
|
|
337
|
+
): [number, number] => {
|
|
338
|
+
if (values.length === 0) return [NaN, NaN];
|
|
339
|
+
|
|
340
|
+
const mean = stats_mean(values);
|
|
341
|
+
const std_dev = stats_std_dev(values, mean);
|
|
342
|
+
|
|
343
|
+
return stats_confidence_interval_from_summary(mean, std_dev, values.length, options);
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Calculate confidence interval from summary statistics (mean, std_dev, sample_size).
|
|
348
|
+
* Useful when raw data is not available.
|
|
349
|
+
* @param mean - Mean of the data
|
|
350
|
+
* @param std_dev - Standard deviation of the data
|
|
351
|
+
* @param sample_size - Number of samples
|
|
352
|
+
* @param options - Configuration options
|
|
353
|
+
* @returns [lower_bound, upper_bound]
|
|
354
|
+
*/
|
|
355
|
+
export const stats_confidence_interval_from_summary = (
|
|
356
|
+
mean: number,
|
|
357
|
+
std_dev: number,
|
|
358
|
+
sample_size: number,
|
|
359
|
+
options?: StatsConfidenceIntervalOptions,
|
|
337
360
|
): [number, number] => {
|
|
338
361
|
// z_score takes precedence, then confidence_level, then default
|
|
339
362
|
const z_score =
|
|
340
363
|
options?.z_score ??
|
|
341
|
-
(options?.confidence_level
|
|
364
|
+
(options?.confidence_level
|
|
365
|
+
? stats_confidence_level_to_z_score(options.confidence_level)
|
|
366
|
+
: null) ??
|
|
342
367
|
DEFAULT_CONFIDENCE_Z;
|
|
343
368
|
|
|
344
|
-
if (
|
|
369
|
+
if (sample_size === 0) return [NaN, NaN];
|
|
345
370
|
|
|
346
|
-
const
|
|
347
|
-
const std_dev = stats_std_dev(values, mean);
|
|
348
|
-
|
|
349
|
-
const se = std_dev / Math.sqrt(values.length);
|
|
371
|
+
const se = std_dev / Math.sqrt(sample_size);
|
|
350
372
|
const margin = z_score * se;
|
|
351
373
|
|
|
352
374
|
return [mean - margin, mean + margin];
|
|
353
375
|
};
|
|
376
|
+
|
|
377
|
+
// Hypothesis Testing Utilities
|
|
378
|
+
// These functions support statistical significance testing (t-tests, p-values, etc.)
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Result from Welch's t-test calculation.
|
|
382
|
+
*/
|
|
383
|
+
export interface StatsWelchTTestResult {
|
|
384
|
+
/** The t-statistic */
|
|
385
|
+
t_statistic: number;
|
|
386
|
+
/** Welch-Satterthwaite degrees of freedom */
|
|
387
|
+
degrees_of_freedom: number;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Calculate Welch's t-test statistic and degrees of freedom.
|
|
392
|
+
* Welch's t-test is more robust than Student's t-test when variances are unequal.
|
|
393
|
+
*
|
|
394
|
+
* @param mean1 - Mean of first sample
|
|
395
|
+
* @param std1 - Standard deviation of first sample
|
|
396
|
+
* @param n1 - Size of first sample
|
|
397
|
+
* @param mean2 - Mean of second sample
|
|
398
|
+
* @param std2 - Standard deviation of second sample
|
|
399
|
+
* @param n2 - Size of second sample
|
|
400
|
+
*/
|
|
401
|
+
export const stats_welch_t_test = (
|
|
402
|
+
mean1: number,
|
|
403
|
+
std1: number,
|
|
404
|
+
n1: number,
|
|
405
|
+
mean2: number,
|
|
406
|
+
std2: number,
|
|
407
|
+
n2: number,
|
|
408
|
+
): StatsWelchTTestResult => {
|
|
409
|
+
const var1 = std1 ** 2;
|
|
410
|
+
const var2 = std2 ** 2;
|
|
411
|
+
|
|
412
|
+
const se1 = var1 / n1;
|
|
413
|
+
const se2 = var2 / n2;
|
|
414
|
+
|
|
415
|
+
const t_statistic = (mean1 - mean2) / Math.sqrt(se1 + se2);
|
|
416
|
+
|
|
417
|
+
// Welch-Satterthwaite degrees of freedom
|
|
418
|
+
const numerator = (se1 + se2) ** 2;
|
|
419
|
+
const denominator = se1 ** 2 / (n1 - 1) + se2 ** 2 / (n2 - 1);
|
|
420
|
+
const degrees_of_freedom = numerator / denominator;
|
|
421
|
+
|
|
422
|
+
return {t_statistic, degrees_of_freedom};
|
|
423
|
+
};
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Standard normal CDF approximation (Abramowitz and Stegun formula 7.1.26).
|
|
427
|
+
*/
|
|
428
|
+
export const stats_normal_cdf = (x: number): number => {
|
|
429
|
+
const t = 1 / (1 + 0.2316419 * Math.abs(x));
|
|
430
|
+
const d = 0.3989423 * Math.exp((-x * x) / 2);
|
|
431
|
+
const p =
|
|
432
|
+
d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));
|
|
433
|
+
return x > 0 ? 1 - p : p;
|
|
434
|
+
};
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Log gamma function approximation (Lanczos approximation).
|
|
438
|
+
*/
|
|
439
|
+
export const stats_ln_gamma = (z: number): number => {
|
|
440
|
+
const g = 7;
|
|
441
|
+
const c = [
|
|
442
|
+
0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
|
|
443
|
+
-176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
|
|
444
|
+
1.5056327351493116e-7,
|
|
445
|
+
];
|
|
446
|
+
|
|
447
|
+
if (z < 0.5) {
|
|
448
|
+
return Math.log(Math.PI / Math.sin(Math.PI * z)) - stats_ln_gamma(1 - z);
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
const z_adj = z - 1;
|
|
452
|
+
let x = c[0]!;
|
|
453
|
+
for (let i = 1; i < g + 2; i++) {
|
|
454
|
+
x += c[i]! / (z_adj + i);
|
|
455
|
+
}
|
|
456
|
+
const t = z_adj + g + 0.5;
|
|
457
|
+
return 0.5 * Math.log(2 * Math.PI) + (z_adj + 0.5) * Math.log(t) - t + Math.log(x);
|
|
458
|
+
};
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Approximate regularized incomplete beta function for p-value calculation.
|
|
462
|
+
* Uses continued fraction expansion for reasonable accuracy.
|
|
463
|
+
*/
|
|
464
|
+
export const stats_incomplete_beta = (x: number, a: number, b: number): number => {
|
|
465
|
+
// Simple approximation using the relationship between beta and normal distributions
|
|
466
|
+
// For our use case (t-distribution p-values), this provides sufficient accuracy
|
|
467
|
+
if (x <= 0) return 0;
|
|
468
|
+
if (x >= 1) return 1;
|
|
469
|
+
|
|
470
|
+
// Use symmetry if needed
|
|
471
|
+
if (x > (a + 1) / (a + b + 2)) {
|
|
472
|
+
return 1 - stats_incomplete_beta(1 - x, b, a);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Continued fraction approximation (first few terms)
|
|
476
|
+
const lnBeta = stats_ln_gamma(a) + stats_ln_gamma(b) - stats_ln_gamma(a + b);
|
|
477
|
+
const front = Math.exp(Math.log(x) * a + Math.log(1 - x) * b - lnBeta) / a;
|
|
478
|
+
|
|
479
|
+
// Simple continued fraction (limited iterations for speed)
|
|
480
|
+
let f = 1;
|
|
481
|
+
let c = 1;
|
|
482
|
+
let d = 0;
|
|
483
|
+
|
|
484
|
+
for (let m = 1; m <= 100; m++) {
|
|
485
|
+
const m2 = 2 * m;
|
|
486
|
+
|
|
487
|
+
// Even step
|
|
488
|
+
let aa = (m * (b - m) * x) / ((a + m2 - 1) * (a + m2));
|
|
489
|
+
d = 1 + aa * d;
|
|
490
|
+
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
491
|
+
c = 1 + aa / c;
|
|
492
|
+
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
493
|
+
d = 1 / d;
|
|
494
|
+
f *= d * c;
|
|
495
|
+
|
|
496
|
+
// Odd step
|
|
497
|
+
aa = (-(a + m) * (a + b + m) * x) / ((a + m2) * (a + m2 + 1));
|
|
498
|
+
d = 1 + aa * d;
|
|
499
|
+
if (Math.abs(d) < 1e-30) d = 1e-30;
|
|
500
|
+
c = 1 + aa / c;
|
|
501
|
+
if (Math.abs(c) < 1e-30) c = 1e-30;
|
|
502
|
+
d = 1 / d;
|
|
503
|
+
const delta = d * c;
|
|
504
|
+
f *= delta;
|
|
505
|
+
|
|
506
|
+
if (Math.abs(delta - 1) < 1e-8) break;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
return front * f;
|
|
510
|
+
};
|
|
511
|
+
|
|
512
|
+
/**
|
|
513
|
+
* Approximate two-tailed p-value from t-distribution.
|
|
514
|
+
* For large df (>100), uses normal approximation.
|
|
515
|
+
* For smaller df, uses incomplete beta function.
|
|
516
|
+
*
|
|
517
|
+
* @param t - Absolute value of t-statistic
|
|
518
|
+
* @param df - Degrees of freedom
|
|
519
|
+
* @returns Two-tailed p-value
|
|
520
|
+
*/
|
|
521
|
+
export const stats_t_distribution_p_value = (t: number, df: number): number => {
|
|
522
|
+
// Use normal approximation for large df
|
|
523
|
+
if (df > 100) {
|
|
524
|
+
return 2 * (1 - stats_normal_cdf(t));
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// For smaller df, use a more accurate approximation
|
|
528
|
+
// Based on the incomplete beta function relationship
|
|
529
|
+
const x = df / (df + t * t);
|
|
530
|
+
const a = df / 2;
|
|
531
|
+
const b = 0.5;
|
|
532
|
+
|
|
533
|
+
return stats_incomplete_beta(x, a, b);
|
|
534
|
+
};
|
package/src/lib/string.ts
CHANGED
|
@@ -97,3 +97,69 @@ export const strip_ansi = (str: string): string => str.replaceAll(/\x1B\[[0-9;]*
|
|
|
97
97
|
*/
|
|
98
98
|
export const stringify = (value: unknown): string =>
|
|
99
99
|
typeof value === 'bigint' ? value + 'n' : (JSON.stringify(value) ?? String(value)); // eslint-disable-line @typescript-eslint/no-unnecessary-condition
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Calculate the display width of a string in terminal columns.
|
|
103
|
+
* - Strips ANSI escape codes (they have 0 width)
|
|
104
|
+
* - Emojis and other wide characters take 2 columns
|
|
105
|
+
* - Tab characters take 4 columns
|
|
106
|
+
* - Newlines and other control characters take 0 columns
|
|
107
|
+
* - Uses `Intl.Segmenter` to properly handle grapheme clusters (e.g., family emoji "👨👩👧👦")
|
|
108
|
+
*/
|
|
109
|
+
export const string_display_width = (str: string): number => {
|
|
110
|
+
// Strip ANSI codes first (they have 0 display width)
|
|
111
|
+
const clean = strip_ansi(str);
|
|
112
|
+
|
|
113
|
+
let width = 0;
|
|
114
|
+
const segmenter = new Intl.Segmenter();
|
|
115
|
+
for (const {segment} of segmenter.segment(clean)) {
|
|
116
|
+
const code = segment.codePointAt(0)!;
|
|
117
|
+
|
|
118
|
+
// Handle control characters
|
|
119
|
+
if (code === 0x09) {
|
|
120
|
+
// Tab = 4 columns
|
|
121
|
+
width += 4;
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
124
|
+
if (code < 0x20 || (code >= 0x7f && code < 0xa0)) {
|
|
125
|
+
// Other control characters (including newline) = 0 width
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Emoji and other wide characters (rough heuristic)
|
|
130
|
+
// - Most emoji are in range 0x1F300-0x1FAFF
|
|
131
|
+
// - Some are in 0x2600-0x27BF (misc symbols)
|
|
132
|
+
// - CJK characters 0x4E00-0x9FFF also double-width
|
|
133
|
+
// - Grapheme clusters with multiple code points (like ZWJ sequences) are typically emoji
|
|
134
|
+
if (
|
|
135
|
+
segment.length > 1 || // Multi-codepoint graphemes (ZWJ sequences, etc.)
|
|
136
|
+
(code >= 0x1f300 && code <= 0x1faff) ||
|
|
137
|
+
(code >= 0x2600 && code <= 0x27bf) ||
|
|
138
|
+
(code >= 0x1f600 && code <= 0x1f64f) ||
|
|
139
|
+
(code >= 0x1f680 && code <= 0x1f6ff) ||
|
|
140
|
+
(code >= 0x4e00 && code <= 0x9fff) // CJK
|
|
141
|
+
) {
|
|
142
|
+
width += 2;
|
|
143
|
+
} else {
|
|
144
|
+
width += 1;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
return width;
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Pad a string to a target display width (accounting for wide characters).
|
|
152
|
+
*/
|
|
153
|
+
export const pad_width = (
|
|
154
|
+
str: string,
|
|
155
|
+
target_width: number,
|
|
156
|
+
align: 'left' | 'right' = 'left',
|
|
157
|
+
): string => {
|
|
158
|
+
const current_width = string_display_width(str);
|
|
159
|
+
const padding = Math.max(0, target_width - current_width);
|
|
160
|
+
if (align === 'left') {
|
|
161
|
+
return str + ' '.repeat(padding);
|
|
162
|
+
} else {
|
|
163
|
+
return ' '.repeat(padding) + str;
|
|
164
|
+
}
|
|
165
|
+
};
|