@ebowwa/quant-rust 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -0
- package/bun-ffi.d.ts +54 -0
- package/dist/index.js +576 -0
- package/dist/src/index.d.ts +324 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/types/index.d.ts +403 -0
- package/dist/types/index.d.ts.map +1 -0
- package/native/README.md +62 -0
- package/native/darwin-arm64/libquant_rust.dylib +0 -0
- package/package.json +70 -0
- package/scripts/postinstall.cjs +85 -0
- package/src/ffi.rs +496 -0
- package/src/index.ts +1073 -0
- package/src/indicators/ma.rs +222 -0
- package/src/indicators/mod.rs +18 -0
- package/src/indicators/momentum.rs +353 -0
- package/src/indicators/sr.rs +195 -0
- package/src/indicators/trend.rs +351 -0
- package/src/indicators/volatility.rs +270 -0
- package/src/indicators/volume.rs +213 -0
- package/src/lib.rs +130 -0
- package/src/patterns/breakout.rs +431 -0
- package/src/patterns/chart.rs +772 -0
- package/src/patterns/mod.rs +394 -0
- package/src/patterns/sr.rs +423 -0
- package/src/prediction/amm.rs +338 -0
- package/src/prediction/arbitrage.rs +230 -0
- package/src/prediction/calibration.rs +317 -0
- package/src/prediction/kelly.rs +232 -0
- package/src/prediction/lmsr.rs +194 -0
- package/src/prediction/mod.rs +59 -0
- package/src/prediction/odds.rs +229 -0
- package/src/prediction/pnl.rs +254 -0
- package/src/prediction/risk.rs +228 -0
- package/src/risk/beta.rs +257 -0
- package/src/risk/drawdown.rs +256 -0
- package/src/risk/leverage.rs +201 -0
- package/src/risk/mod.rs +388 -0
- package/src/risk/portfolio.rs +287 -0
- package/src/risk/ratios.rs +290 -0
- package/src/risk/sizing.rs +194 -0
- package/src/risk/var.rs +222 -0
- package/src/stats/cdf.rs +257 -0
- package/src/stats/correlation.rs +225 -0
- package/src/stats/distribution.rs +194 -0
- package/src/stats/hypothesis.rs +177 -0
- package/src/stats/matrix.rs +346 -0
- package/src/stats/mod.rs +257 -0
- package/src/stats/regression.rs +239 -0
- package/src/stats/rolling.rs +193 -0
- package/src/stats/timeseries.rs +263 -0
- package/src/types.rs +224 -0
- package/src/utils/mod.rs +215 -0
- package/src/utils/normalize.rs +192 -0
- package/src/utils/price.rs +167 -0
- package/src/utils/quantiles.rs +177 -0
- package/src/utils/returns.rs +158 -0
- package/src/utils/rolling.rs +97 -0
- package/src/utils/stats.rs +154 -0
- package/types/index.ts +513 -0
package/src/stats/cdf.rs
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
//! Distribution Functions Module
|
|
2
|
+
//!
|
|
3
|
+
//! Cumulative distribution functions and special functions
|
|
4
|
+
|
|
5
|
+
/// Standard normal CDF (approximation using error function)
|
|
6
|
+
pub fn normal_cdf(x: f64) -> f64 {
|
|
7
|
+
// Abramowitz and Stegun approximation for error function
|
|
8
|
+
// Then converted to standard normal CDF
|
|
9
|
+
const A1: f64 = 0.254829592;
|
|
10
|
+
const A2: f64 = -0.284496736;
|
|
11
|
+
const A3: f64 = 1.421413741;
|
|
12
|
+
const A4: f64 = -1.453152027;
|
|
13
|
+
const A5: f64 = 1.061405429;
|
|
14
|
+
const P: f64 = 0.3275911;
|
|
15
|
+
|
|
16
|
+
let sign = if x < 0.0 { -1.0 } else { 1.0 };
|
|
17
|
+
let z = x.abs() / std::f64::consts::SQRT_2;
|
|
18
|
+
|
|
19
|
+
let t = 1.0 / (1.0 + P * z);
|
|
20
|
+
let erf = 1.0 - (((((A5 * t + A4) * t) + A3) * t + A2) * t + A1) * t * (-z * z).exp();
|
|
21
|
+
|
|
22
|
+
0.5 * (1.0 + sign * erf)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/// t-distribution CDF approximation
|
|
26
|
+
pub fn t_cdf(t: f64, df: f64) -> f64 {
|
|
27
|
+
// Approximation: t -> normal as df -> infinity
|
|
28
|
+
if df > 100.0 {
|
|
29
|
+
return normal_cdf(t);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Use incomplete beta function approximation
|
|
33
|
+
let x = df / (df + t * t);
|
|
34
|
+
1.0 - 0.5 * incomplete_beta(x, df / 2.0, 0.5)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/// Gamma function (Lanczos approximation)
|
|
38
|
+
pub fn gamma(x: f64) -> f64 {
|
|
39
|
+
if x < 0.5 {
|
|
40
|
+
return std::f64::consts::PI
|
|
41
|
+
/ (std::f64::consts::PI * x).sin()
|
|
42
|
+
/ gamma(1.0 - x);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
let x = x - 1.0;
|
|
46
|
+
let g = 7;
|
|
47
|
+
let c = [
|
|
48
|
+
0.99999999999980993,
|
|
49
|
+
676.5203681218851,
|
|
50
|
+
-1259.1392167224028,
|
|
51
|
+
771.32342877765313,
|
|
52
|
+
-176.61502916214059,
|
|
53
|
+
12.507343278686905,
|
|
54
|
+
-0.13857109526572012,
|
|
55
|
+
9.9843695780195716e-6,
|
|
56
|
+
1.5056327351493116e-7,
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
let mut y = c[0];
|
|
60
|
+
for i in 1..g + 2 {
|
|
61
|
+
y += c[i] / (x + i as f64);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let t = x + g as f64 + 0.5;
|
|
65
|
+
(2.0 * std::f64::consts::PI).sqrt() * t.powf(x + 0.5) * (-t).exp() * y
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/// Beta function
|
|
69
|
+
pub fn beta(a: f64, b: f64) -> f64 {
|
|
70
|
+
gamma(a) * gamma(b) / gamma(a + b)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/// Incomplete beta function (simplified approximation)
|
|
74
|
+
pub fn incomplete_beta(x: f64, a: f64, b: f64) -> f64 {
|
|
75
|
+
if x <= 0.0 {
|
|
76
|
+
return 0.0;
|
|
77
|
+
}
|
|
78
|
+
if x >= 1.0 {
|
|
79
|
+
return 1.0;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Use continued fraction expansion for better accuracy
|
|
83
|
+
// This is a simplified version
|
|
84
|
+
let max_iterations = 100;
|
|
85
|
+
let epsilon = 1e-10;
|
|
86
|
+
|
|
87
|
+
// Front term
|
|
88
|
+
let front = x.powf(a) * (1.0 - x).powf(b) / (a * beta(a, b));
|
|
89
|
+
|
|
90
|
+
// Lentz's algorithm for continued fraction
|
|
91
|
+
let mut f = 1.0;
|
|
92
|
+
let mut c = 1.0;
|
|
93
|
+
let mut d = 1.0 - (a + b) * x / (a + 1.0);
|
|
94
|
+
if d.abs() < epsilon {
|
|
95
|
+
d = epsilon;
|
|
96
|
+
}
|
|
97
|
+
d = 1.0 / d;
|
|
98
|
+
|
|
99
|
+
for m in 1..max_iterations {
|
|
100
|
+
let m_f64 = m as f64;
|
|
101
|
+
|
|
102
|
+
// Even step
|
|
103
|
+
let mut numerator = m_f64 * (b - m_f64) * x / ((a + 2.0 * m_f64 - 1.0) * (a + 2.0 * m_f64));
|
|
104
|
+
d = 1.0 + numerator * d;
|
|
105
|
+
if d.abs() < epsilon {
|
|
106
|
+
d = epsilon;
|
|
107
|
+
}
|
|
108
|
+
d = 1.0 / d;
|
|
109
|
+
c = 1.0 + numerator / c;
|
|
110
|
+
if c.abs() < epsilon {
|
|
111
|
+
c = epsilon;
|
|
112
|
+
}
|
|
113
|
+
f *= c * d;
|
|
114
|
+
|
|
115
|
+
// Odd step
|
|
116
|
+
numerator = -(a + m_f64) * (a + b + m_f64) * x / ((a + 2.0 * m_f64) * (a + 2.0 * m_f64 + 1.0));
|
|
117
|
+
d = 1.0 + numerator * d;
|
|
118
|
+
if d.abs() < epsilon {
|
|
119
|
+
d = epsilon;
|
|
120
|
+
}
|
|
121
|
+
d = 1.0 / d;
|
|
122
|
+
c = 1.0 + numerator / c;
|
|
123
|
+
if c.abs() < epsilon {
|
|
124
|
+
c = epsilon;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let delta = c * d;
|
|
128
|
+
f *= delta;
|
|
129
|
+
|
|
130
|
+
if (delta - 1.0).abs() < epsilon {
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
front * f
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/// Chi-squared CDF (approximation using incomplete gamma)
|
|
139
|
+
pub fn chi_squared_cdf(x: f64, df: f64) -> f64 {
|
|
140
|
+
if x <= 0.0 {
|
|
141
|
+
return 0.0;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let k = df / 2.0;
|
|
145
|
+
lower_incomplete_gamma(x / 2.0, k) / gamma(k)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/// Lower incomplete gamma function (simplified)
|
|
149
|
+
fn lower_incomplete_gamma(x: f64, a: f64) -> f64 {
|
|
150
|
+
if x < 0.0 || a <= 0.0 {
|
|
151
|
+
return 0.0;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Series expansion
|
|
155
|
+
let max_iterations = 200;
|
|
156
|
+
let epsilon = 1e-10;
|
|
157
|
+
|
|
158
|
+
if x < a + 1.0 {
|
|
159
|
+
// Use series representation
|
|
160
|
+
let mut sum = 1.0 / a;
|
|
161
|
+
let mut term = 1.0 / a;
|
|
162
|
+
|
|
163
|
+
for n in 1..max_iterations {
|
|
164
|
+
term *= x / (a + n as f64);
|
|
165
|
+
sum += term;
|
|
166
|
+
if term.abs() < sum.abs() * epsilon {
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
sum * x.powf(a) * (-x).exp()
|
|
172
|
+
} else {
|
|
173
|
+
// Use continued fraction
|
|
174
|
+
let mut f = 1.0;
|
|
175
|
+
let mut c = 1.0;
|
|
176
|
+
let mut d = 1.0 / (1.0 + x - a);
|
|
177
|
+
|
|
178
|
+
if d.abs() < epsilon {
|
|
179
|
+
d = epsilon;
|
|
180
|
+
}
|
|
181
|
+
d = 1.0 / d;
|
|
182
|
+
|
|
183
|
+
for n in 1..max_iterations {
|
|
184
|
+
let n_f64 = n as f64;
|
|
185
|
+
let coef = n_f64 * (a - n_f64);
|
|
186
|
+
|
|
187
|
+
let numerator = (1.0 + 2.0 * n_f64) * (x - a) + coef;
|
|
188
|
+
d = 1.0 + numerator * d;
|
|
189
|
+
if d.abs() < epsilon {
|
|
190
|
+
d = epsilon;
|
|
191
|
+
}
|
|
192
|
+
d = 1.0 / d;
|
|
193
|
+
|
|
194
|
+
c = 1.0 + numerator / c;
|
|
195
|
+
if c.abs() < epsilon {
|
|
196
|
+
c = epsilon;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
let delta = c * d;
|
|
200
|
+
f *= delta;
|
|
201
|
+
|
|
202
|
+
if (delta - 1.0).abs() < epsilon {
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
gamma(a) - x.powf(a) * (-x).exp() * f
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
#[cfg(test)]
|
|
212
|
+
mod tests {
|
|
213
|
+
use super::*;
|
|
214
|
+
|
|
215
|
+
#[test]
|
|
216
|
+
fn test_normal_cdf() {
|
|
217
|
+
// Standard normal: P(Z < 0) = 0.5
|
|
218
|
+
// The approximation has slight numerical error, so we use 1e-9 tolerance
|
|
219
|
+
assert!((normal_cdf(0.0) - 0.5).abs() < 1e-9);
|
|
220
|
+
|
|
221
|
+
// P(Z < 1.96) ≈ 0.975
|
|
222
|
+
assert!((normal_cdf(1.96) - 0.975).abs() < 0.001);
|
|
223
|
+
|
|
224
|
+
// P(Z < -1.96) ≈ 0.025
|
|
225
|
+
assert!((normal_cdf(-1.96) - 0.025).abs() < 0.001);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
#[test]
|
|
229
|
+
fn test_gamma() {
|
|
230
|
+
// Gamma(1) = 1
|
|
231
|
+
assert!((gamma(1.0) - 1.0).abs() < 1e-10);
|
|
232
|
+
|
|
233
|
+
// Gamma(5) = 4! = 24
|
|
234
|
+
assert!((gamma(5.0) - 24.0).abs() < 1e-10);
|
|
235
|
+
|
|
236
|
+
// Gamma(0.5) = sqrt(pi)
|
|
237
|
+
assert!((gamma(0.5) - std::f64::consts::PI.sqrt()).abs() < 1e-10);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
#[test]
|
|
241
|
+
fn test_beta() {
|
|
242
|
+
// Beta(1, 1) = 1
|
|
243
|
+
assert!((beta(1.0, 1.0) - 1.0).abs() < 1e-10);
|
|
244
|
+
|
|
245
|
+
// Beta(2, 2) = 1/6
|
|
246
|
+
assert!((beta(2.0, 2.0) - 1.0 / 6.0).abs() < 1e-10);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
#[test]
|
|
250
|
+
fn test_t_cdf() {
|
|
251
|
+
// For large df, t_cdf should approach normal_cdf
|
|
252
|
+
let t = 1.0;
|
|
253
|
+
let t_cdf_val = t_cdf(t, 1000.0);
|
|
254
|
+
let normal_cdf_val = normal_cdf(t);
|
|
255
|
+
assert!((t_cdf_val - normal_cdf_val).abs() < 0.01);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
//! Correlation Analysis Module
|
|
2
|
+
//!
|
|
3
|
+
//! Pearson and Spearman correlation with statistical significance testing
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
use super::cdf::normal_cdf;
|
|
7
|
+
use crate::utils::mean;
|
|
8
|
+
|
|
9
|
+
/// Correlation result with statistical significance
|
|
10
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
11
|
+
pub struct CorrelationResult {
|
|
12
|
+
/// Correlation coefficient
|
|
13
|
+
pub correlation: f64,
|
|
14
|
+
/// P-value (two-tailed)
|
|
15
|
+
pub p_value: f64,
|
|
16
|
+
/// Sample size
|
|
17
|
+
pub n: usize,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/// Calculate Pearson correlation coefficient
|
|
21
|
+
pub fn pearson_correlation(x: &[f64], y: &[f64]) -> f64 {
|
|
22
|
+
if x.len() != y.len() || x.is_empty() {
|
|
23
|
+
return 0.0;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
let n = x.len();
|
|
27
|
+
let mean_x = mean(x);
|
|
28
|
+
let mean_y = mean(y);
|
|
29
|
+
|
|
30
|
+
let mut sum_xy = 0.0;
|
|
31
|
+
let mut sum_x2 = 0.0;
|
|
32
|
+
let mut sum_y2 = 0.0;
|
|
33
|
+
|
|
34
|
+
for i in 0..n {
|
|
35
|
+
let dx = x[i] - mean_x;
|
|
36
|
+
let dy = y[i] - mean_y;
|
|
37
|
+
sum_xy += dx * dy;
|
|
38
|
+
sum_x2 += dx * dx;
|
|
39
|
+
sum_y2 += dy * dy;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
let denominator = (sum_x2 * sum_y2).sqrt();
|
|
43
|
+
if denominator > 0.0 {
|
|
44
|
+
sum_xy / denominator
|
|
45
|
+
} else {
|
|
46
|
+
0.0
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/// Calculate Spearman rank correlation
|
|
51
|
+
pub fn spearman_correlation(x: &[f64], y: &[f64]) -> f64 {
|
|
52
|
+
if x.len() != y.len() || x.is_empty() {
|
|
53
|
+
return 0.0;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Convert to ranks
|
|
57
|
+
let rank_x = ranks(x);
|
|
58
|
+
let rank_y = ranks(y);
|
|
59
|
+
|
|
60
|
+
pearson_correlation(&rank_x, &rank_y)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/// Convert array to ranks (handling ties)
|
|
64
|
+
fn ranks(arr: &[f64]) -> Vec<f64> {
|
|
65
|
+
let n = arr.len();
|
|
66
|
+
let mut indexed: Vec<(usize, f64)> = arr.iter().cloned().enumerate().collect();
|
|
67
|
+
indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
68
|
+
|
|
69
|
+
let mut ranks = vec![0.0; n];
|
|
70
|
+
let mut i = 0;
|
|
71
|
+
|
|
72
|
+
while i < n {
|
|
73
|
+
let mut j = i;
|
|
74
|
+
// Find ties
|
|
75
|
+
while j < n - 1 && indexed[j].1 == indexed[j + 1].1 {
|
|
76
|
+
j += 1;
|
|
77
|
+
}
|
|
78
|
+
// Average rank for ties (1-based rank)
|
|
79
|
+
let avg_rank = (i + j) as f64 / 2.0 + 1.0;
|
|
80
|
+
for k in i..=j {
|
|
81
|
+
ranks[indexed[k].0] = avg_rank;
|
|
82
|
+
}
|
|
83
|
+
i = j + 1;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
ranks
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/// Correlation with p-value (t-test)
|
|
90
|
+
pub fn correlation_with_p_value(x: &[f64], y: &[f64]) -> CorrelationResult {
|
|
91
|
+
let n = x.len();
|
|
92
|
+
if n < 3 {
|
|
93
|
+
return CorrelationResult {
|
|
94
|
+
correlation: 0.0,
|
|
95
|
+
p_value: 1.0,
|
|
96
|
+
n,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let r = pearson_correlation(x, y);
|
|
101
|
+
|
|
102
|
+
// t-statistic for correlation
|
|
103
|
+
let r_squared = r * r;
|
|
104
|
+
if r_squared >= 1.0 {
|
|
105
|
+
return CorrelationResult {
|
|
106
|
+
correlation: r,
|
|
107
|
+
p_value: if r.abs() >= 1.0 { 0.0 } else { 1.0 },
|
|
108
|
+
n,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
let t = r * ((n - 2) as f64 / (1.0 - r_squared)).sqrt();
|
|
113
|
+
|
|
114
|
+
// Approximate p-value using normal approximation for large n
|
|
115
|
+
let p_value = 2.0 * (1.0 - normal_cdf(t.abs()));
|
|
116
|
+
|
|
117
|
+
CorrelationResult {
|
|
118
|
+
correlation: r,
|
|
119
|
+
p_value: p_value.clamp(0.0, 1.0),
|
|
120
|
+
n,
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/// Calculate correlation matrix for multiple series
|
|
125
|
+
pub fn correlation_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
|
|
126
|
+
let n = data.len();
|
|
127
|
+
if n == 0 {
|
|
128
|
+
return Vec::new();
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
let mut matrix = vec![vec![0.0; n]; n];
|
|
132
|
+
|
|
133
|
+
for i in 0..n {
|
|
134
|
+
matrix[i][i] = 1.0;
|
|
135
|
+
for j in (i + 1)..n {
|
|
136
|
+
let corr = pearson_correlation(&data[i], &data[j]);
|
|
137
|
+
matrix[i][j] = corr;
|
|
138
|
+
matrix[j][i] = corr;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
matrix
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/// Calculate covariance
|
|
146
|
+
pub fn covariance(x: &[f64], y: &[f64]) -> f64 {
|
|
147
|
+
if x.len() != y.len() || x.is_empty() {
|
|
148
|
+
return f64::NAN;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
let n = x.len() as f64;
|
|
152
|
+
let mean_x = mean(x);
|
|
153
|
+
let mean_y = mean(y);
|
|
154
|
+
|
|
155
|
+
x.iter()
|
|
156
|
+
.zip(y.iter())
|
|
157
|
+
.map(|(&xi, &yi)| (xi - mean_x) * (yi - mean_y))
|
|
158
|
+
.sum::<f64>() / n
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
#[cfg(test)]
|
|
162
|
+
mod tests {
|
|
163
|
+
use super::*;
|
|
164
|
+
|
|
165
|
+
#[test]
|
|
166
|
+
fn test_pearson_correlation() {
|
|
167
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
168
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
169
|
+
let corr = pearson_correlation(&x, &y);
|
|
170
|
+
assert!((corr - 1.0).abs() < 1e-10);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
#[test]
|
|
174
|
+
fn test_pearson_correlation_negative() {
|
|
175
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
176
|
+
let y = [10.0, 8.0, 6.0, 4.0, 2.0];
|
|
177
|
+
let corr = pearson_correlation(&x, &y);
|
|
178
|
+
assert!((corr - (-1.0)).abs() < 1e-10);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
#[test]
|
|
182
|
+
fn test_spearman_correlation() {
|
|
183
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
184
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
185
|
+
let corr = spearman_correlation(&x, &y);
|
|
186
|
+
assert!((corr - 1.0).abs() < 1e-10);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
#[test]
|
|
190
|
+
fn test_correlation_with_p_value() {
|
|
191
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
192
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
193
|
+
let result = correlation_with_p_value(&x, &y);
|
|
194
|
+
|
|
195
|
+
assert!((result.correlation - 1.0).abs() < 1e-10);
|
|
196
|
+
assert!(result.p_value < 0.05); // Highly significant
|
|
197
|
+
assert_eq!(result.n, 5);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
#[test]
|
|
201
|
+
fn test_correlation_matrix() {
|
|
202
|
+
let data = vec![
|
|
203
|
+
vec![1.0, 2.0, 3.0],
|
|
204
|
+
vec![2.0, 4.0, 6.0],
|
|
205
|
+
vec![3.0, 2.0, 1.0],
|
|
206
|
+
];
|
|
207
|
+
let matrix = correlation_matrix(&data);
|
|
208
|
+
|
|
209
|
+
assert_eq!(matrix.len(), 3);
|
|
210
|
+
// Perfect correlation between first two series
|
|
211
|
+
assert!((matrix[0][1] - 1.0).abs() < 1e-10);
|
|
212
|
+
// Diagonal should be 1
|
|
213
|
+
assert!((matrix[0][0] - 1.0).abs() < 1e-10);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
#[test]
|
|
217
|
+
fn test_ranks() {
|
|
218
|
+
let arr = [3.0, 1.0, 4.0, 1.0, 5.0];
|
|
219
|
+
let r = ranks(&arr);
|
|
220
|
+
|
|
221
|
+
// 1.0 appears at indices 1 and 3, should have rank 1.5 (avg of 1 and 2)
|
|
222
|
+
assert!((r[1] - 1.5).abs() < 1e-10);
|
|
223
|
+
assert!((r[3] - 1.5).abs() < 1e-10);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
//! Distribution Statistics Module
|
|
2
|
+
//!
|
|
3
|
+
//! Comprehensive distribution statistics including moments, skewness, kurtosis
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
use crate::utils::{mean, median, mode, variance, std_dev, min, max};
|
|
7
|
+
use crate::utils::quantiles::{quartiles, iqr};
|
|
8
|
+
|
|
9
|
+
/// Distribution statistics result
|
|
10
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
11
|
+
pub struct DistributionStats {
|
|
12
|
+
/// Mean (average)
|
|
13
|
+
pub mean: f64,
|
|
14
|
+
/// Median (50th percentile)
|
|
15
|
+
pub median: f64,
|
|
16
|
+
/// Mode (most frequent value)
|
|
17
|
+
pub mode: f64,
|
|
18
|
+
/// Standard deviation
|
|
19
|
+
pub std_dev: f64,
|
|
20
|
+
/// Variance
|
|
21
|
+
pub variance: f64,
|
|
22
|
+
/// Skewness (Pearson's moment coefficient)
|
|
23
|
+
pub skewness: f64,
|
|
24
|
+
/// Kurtosis (excess kurtosis, normal = 0)
|
|
25
|
+
pub kurtosis: f64,
|
|
26
|
+
/// Minimum value
|
|
27
|
+
pub min: f64,
|
|
28
|
+
/// Maximum value
|
|
29
|
+
pub max: f64,
|
|
30
|
+
/// Range (max - min)
|
|
31
|
+
pub range: f64,
|
|
32
|
+
/// First quartile (25th percentile)
|
|
33
|
+
pub q1: f64,
|
|
34
|
+
/// Second quartile / Median (50th percentile)
|
|
35
|
+
pub q2: f64,
|
|
36
|
+
/// Third quartile (75th percentile)
|
|
37
|
+
pub q3: f64,
|
|
38
|
+
/// Interquartile range (Q3 - Q1)
|
|
39
|
+
pub iqr: f64,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/// Moments of a distribution
|
|
43
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
44
|
+
pub struct Moments {
|
|
45
|
+
/// First moment (mean)
|
|
46
|
+
pub mean: f64,
|
|
47
|
+
/// Second central moment (variance)
|
|
48
|
+
pub variance: f64,
|
|
49
|
+
/// Third standardized moment (skewness)
|
|
50
|
+
pub skewness: f64,
|
|
51
|
+
/// Fourth standardized moment (excess kurtosis)
|
|
52
|
+
pub kurtosis: f64,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/// Calculate comprehensive distribution statistics
|
|
56
|
+
pub fn distribution_stats(data: &[f64]) -> DistributionStats {
|
|
57
|
+
if data.is_empty() {
|
|
58
|
+
return DistributionStats {
|
|
59
|
+
mean: 0.0,
|
|
60
|
+
median: 0.0,
|
|
61
|
+
mode: 0.0,
|
|
62
|
+
std_dev: 0.0,
|
|
63
|
+
variance: 0.0,
|
|
64
|
+
skewness: 0.0,
|
|
65
|
+
kurtosis: 0.0,
|
|
66
|
+
min: 0.0,
|
|
67
|
+
max: 0.0,
|
|
68
|
+
range: 0.0,
|
|
69
|
+
q1: 0.0,
|
|
70
|
+
q2: 0.0,
|
|
71
|
+
q3: 0.0,
|
|
72
|
+
iqr: 0.0,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
let avg = mean(data);
|
|
77
|
+
let sd = std_dev(data, false);
|
|
78
|
+
let (q1, q2, q3) = quartiles(data);
|
|
79
|
+
|
|
80
|
+
// Calculate skewness (Pearson's moment coefficient)
|
|
81
|
+
let skewness = if sd > 0.0 {
|
|
82
|
+
let n = data.len() as f64;
|
|
83
|
+
data.iter()
|
|
84
|
+
.map(|v| ((v - avg) / sd).powi(3))
|
|
85
|
+
.sum::<f64>() / n
|
|
86
|
+
} else {
|
|
87
|
+
0.0
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// Calculate kurtosis (excess kurtosis, normal = 0)
|
|
91
|
+
let kurtosis = if sd > 0.0 {
|
|
92
|
+
let n = data.len() as f64;
|
|
93
|
+
data.iter()
|
|
94
|
+
.map(|v| ((v - avg) / sd).powi(4))
|
|
95
|
+
.sum::<f64>() / n - 3.0 // Excess kurtosis
|
|
96
|
+
} else {
|
|
97
|
+
0.0
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
let min_val = min(data);
|
|
101
|
+
let max_val = max(data);
|
|
102
|
+
|
|
103
|
+
DistributionStats {
|
|
104
|
+
mean: avg,
|
|
105
|
+
median: median(data),
|
|
106
|
+
mode: mode(data),
|
|
107
|
+
std_dev: sd,
|
|
108
|
+
variance: variance(data, false),
|
|
109
|
+
skewness,
|
|
110
|
+
kurtosis,
|
|
111
|
+
min: min_val,
|
|
112
|
+
max: max_val,
|
|
113
|
+
range: max_val - min_val,
|
|
114
|
+
q1,
|
|
115
|
+
q2,
|
|
116
|
+
q3,
|
|
117
|
+
iqr: iqr(data),
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/// Calculate moments of distribution
|
|
122
|
+
pub fn moments(data: &[f64]) -> Moments {
|
|
123
|
+
if data.is_empty() {
|
|
124
|
+
return Moments {
|
|
125
|
+
mean: 0.0,
|
|
126
|
+
variance: 0.0,
|
|
127
|
+
skewness: 0.0,
|
|
128
|
+
kurtosis: 0.0,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
let avg = mean(data);
|
|
133
|
+
let sd = std_dev(data, false);
|
|
134
|
+
let n = data.len() as f64;
|
|
135
|
+
|
|
136
|
+
let mut m2 = 0.0;
|
|
137
|
+
let mut m3 = 0.0;
|
|
138
|
+
let mut m4 = 0.0;
|
|
139
|
+
|
|
140
|
+
for &x in data {
|
|
141
|
+
let d = x - avg;
|
|
142
|
+
m2 += d * d;
|
|
143
|
+
m3 += d * d * d;
|
|
144
|
+
m4 += d * d * d * d;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
m2 /= n;
|
|
148
|
+
m3 /= n;
|
|
149
|
+
m4 /= n;
|
|
150
|
+
|
|
151
|
+
Moments {
|
|
152
|
+
mean: avg,
|
|
153
|
+
variance: m2,
|
|
154
|
+
skewness: if sd > 0.0 { m3 / (sd * sd * sd) } else { 0.0 },
|
|
155
|
+
kurtosis: if sd > 0.0 { m4 / (m2 * m2) - 3.0 } else { 0.0 },
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
#[cfg(test)]
|
|
160
|
+
mod tests {
|
|
161
|
+
use super::*;
|
|
162
|
+
|
|
163
|
+
#[test]
|
|
164
|
+
fn test_distribution_stats() {
|
|
165
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
166
|
+
let stats = distribution_stats(&data);
|
|
167
|
+
|
|
168
|
+
assert!((stats.mean - 3.0).abs() < 1e-10);
|
|
169
|
+
assert!((stats.median - 3.0).abs() < 1e-10);
|
|
170
|
+
assert!((stats.min - 1.0).abs() < 1e-10);
|
|
171
|
+
assert!((stats.max - 5.0).abs() < 1e-10);
|
|
172
|
+
assert!((stats.range - 4.0).abs() < 1e-10);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
#[test]
|
|
176
|
+
fn test_moments() {
|
|
177
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
178
|
+
let m = moments(&data);
|
|
179
|
+
|
|
180
|
+
assert!((m.mean - 3.0).abs() < 1e-10);
|
|
181
|
+
assert!(m.variance > 0.0);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#[test]
|
|
185
|
+
fn test_empty_data() {
|
|
186
|
+
let data: [f64; 0] = [];
|
|
187
|
+
let stats = distribution_stats(&data);
|
|
188
|
+
assert_eq!(stats.mean, 0.0);
|
|
189
|
+
assert_eq!(stats.median, 0.0);
|
|
190
|
+
|
|
191
|
+
let m = moments(&data);
|
|
192
|
+
assert_eq!(m.mean, 0.0);
|
|
193
|
+
}
|
|
194
|
+
}
|