@ebowwa/quant-rust 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -0
- package/bun-ffi.d.ts +54 -0
- package/dist/index.js +576 -0
- package/dist/src/index.d.ts +324 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/types/index.d.ts +403 -0
- package/dist/types/index.d.ts.map +1 -0
- package/native/README.md +62 -0
- package/native/darwin-arm64/libquant_rust.dylib +0 -0
- package/package.json +70 -0
- package/scripts/postinstall.cjs +85 -0
- package/src/ffi.rs +496 -0
- package/src/index.ts +1073 -0
- package/src/indicators/ma.rs +222 -0
- package/src/indicators/mod.rs +18 -0
- package/src/indicators/momentum.rs +353 -0
- package/src/indicators/sr.rs +195 -0
- package/src/indicators/trend.rs +351 -0
- package/src/indicators/volatility.rs +270 -0
- package/src/indicators/volume.rs +213 -0
- package/src/lib.rs +130 -0
- package/src/patterns/breakout.rs +431 -0
- package/src/patterns/chart.rs +772 -0
- package/src/patterns/mod.rs +394 -0
- package/src/patterns/sr.rs +423 -0
- package/src/prediction/amm.rs +338 -0
- package/src/prediction/arbitrage.rs +230 -0
- package/src/prediction/calibration.rs +317 -0
- package/src/prediction/kelly.rs +232 -0
- package/src/prediction/lmsr.rs +194 -0
- package/src/prediction/mod.rs +59 -0
- package/src/prediction/odds.rs +229 -0
- package/src/prediction/pnl.rs +254 -0
- package/src/prediction/risk.rs +228 -0
- package/src/risk/beta.rs +257 -0
- package/src/risk/drawdown.rs +256 -0
- package/src/risk/leverage.rs +201 -0
- package/src/risk/mod.rs +388 -0
- package/src/risk/portfolio.rs +287 -0
- package/src/risk/ratios.rs +290 -0
- package/src/risk/sizing.rs +194 -0
- package/src/risk/var.rs +222 -0
- package/src/stats/cdf.rs +257 -0
- package/src/stats/correlation.rs +225 -0
- package/src/stats/distribution.rs +194 -0
- package/src/stats/hypothesis.rs +177 -0
- package/src/stats/matrix.rs +346 -0
- package/src/stats/mod.rs +257 -0
- package/src/stats/regression.rs +239 -0
- package/src/stats/rolling.rs +193 -0
- package/src/stats/timeseries.rs +263 -0
- package/src/types.rs +224 -0
- package/src/utils/mod.rs +215 -0
- package/src/utils/normalize.rs +192 -0
- package/src/utils/price.rs +167 -0
- package/src/utils/quantiles.rs +177 -0
- package/src/utils/returns.rs +158 -0
- package/src/utils/rolling.rs +97 -0
- package/src/utils/stats.rs +154 -0
- package/types/index.ts +513 -0
package/src/stats/mod.rs
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
//! Statistical Analysis Module
|
|
2
|
+
//!
|
|
3
|
+
//! Provides statistical functions including correlation, regression, distributions, etc.
|
|
4
|
+
|
|
5
|
+
mod cdf;
|
|
6
|
+
mod correlation;
|
|
7
|
+
mod distribution;
|
|
8
|
+
mod hypothesis;
|
|
9
|
+
mod matrix;
|
|
10
|
+
mod regression;
|
|
11
|
+
mod rolling;
|
|
12
|
+
mod timeseries;
|
|
13
|
+
|
|
14
|
+
// Re-export types
|
|
15
|
+
pub use correlation::CorrelationResult;
|
|
16
|
+
pub use distribution::{DistributionStats, Moments};
|
|
17
|
+
pub use hypothesis::{ADFResult, TTestResult};
|
|
18
|
+
pub use regression::{MultipleRegressionResult, RegressionResult};
|
|
19
|
+
pub use rolling::{RollingRegressionResult, RollingStats};
|
|
20
|
+
pub use timeseries::{DoubleExponentialResult, TripleExponentialResult};
|
|
21
|
+
|
|
22
|
+
// Re-export existing functions (for backward compatibility)
|
|
23
|
+
pub use correlation::{correlation_with_p_value, covariance, pearson_correlation, spearman_correlation, correlation_matrix};
|
|
24
|
+
pub use cdf::{beta, gamma, incomplete_beta, normal_cdf, t_cdf};
|
|
25
|
+
pub use distribution::{distribution_stats, moments};
|
|
26
|
+
pub use hypothesis::{adf_test, one_sample_t_test, two_sample_t_test};
|
|
27
|
+
pub use matrix::{determinant, matrix_inverse, matrix_multiply, matrix_vector_multiply, solve_linear_system, transpose};
|
|
28
|
+
pub use regression::{linear_regression, multiple_regression};
|
|
29
|
+
pub use rolling::{rolling_correlation, rolling_regression, rolling_stats, rolling_z_score};
|
|
30
|
+
pub use timeseries::{
|
|
31
|
+
autocorrelation, double_exponential_smoothing, exponential_smoothing,
|
|
32
|
+
partial_autocorrelation, triple_exponential_smoothing,
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// Legacy functions (for backward compatibility with existing lib.rs)
|
|
36
|
+
use serde::{Deserialize, Serialize};
|
|
37
|
+
|
|
38
|
+
/// Linear regression result (legacy format)
|
|
39
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
40
|
+
pub struct LegacyRegressionResult {
|
|
41
|
+
/// Slope (beta coefficient)
|
|
42
|
+
pub slope: f64,
|
|
43
|
+
/// Intercept (alpha)
|
|
44
|
+
pub intercept: f64,
|
|
45
|
+
/// R-squared (coefficient of determination)
|
|
46
|
+
pub r_squared: f64,
|
|
47
|
+
/// Standard error of the slope
|
|
48
|
+
pub std_error: f64,
|
|
49
|
+
/// Number of observations
|
|
50
|
+
pub n: usize,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/// Distribution statistics (legacy format)
|
|
54
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
55
|
+
pub struct LegacyDistributionStats {
|
|
56
|
+
/// Mean
|
|
57
|
+
pub mean: f64,
|
|
58
|
+
/// Median
|
|
59
|
+
pub median: f64,
|
|
60
|
+
/// Standard deviation
|
|
61
|
+
pub std_dev: f64,
|
|
62
|
+
/// Variance
|
|
63
|
+
pub variance: f64,
|
|
64
|
+
/// Skewness (asymmetry measure)
|
|
65
|
+
pub skewness: f64,
|
|
66
|
+
/// Kurtosis (tail heaviness, excess over normal)
|
|
67
|
+
pub kurtosis: f64,
|
|
68
|
+
/// Minimum value
|
|
69
|
+
pub min: f64,
|
|
70
|
+
/// Maximum value
|
|
71
|
+
pub max: f64,
|
|
72
|
+
/// Sample size
|
|
73
|
+
pub count: usize,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/// Autocorrelation result
|
|
77
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
78
|
+
pub struct AutocorrelationResult {
|
|
79
|
+
/// Lag values
|
|
80
|
+
pub lags: Vec<usize>,
|
|
81
|
+
/// Autocorrelation coefficients
|
|
82
|
+
pub autocorrelations: Vec<f64>,
|
|
83
|
+
/// Confidence level (typically 1.96 / sqrt(n) for 95%)
|
|
84
|
+
pub confidence_bound: f64,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// Calculate Pearson correlation coefficient (legacy)
|
|
88
|
+
pub fn correlation(x: &[f64], y: &[f64]) -> f64 {
|
|
89
|
+
pearson_correlation(x, y)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/// Perform linear regression (legacy format)
|
|
93
|
+
pub fn linear_regression_legacy(x: &[f64], y: &[f64]) -> LegacyRegressionResult {
|
|
94
|
+
let result = linear_regression(x, y);
|
|
95
|
+
LegacyRegressionResult {
|
|
96
|
+
slope: result.slope,
|
|
97
|
+
intercept: result.intercept,
|
|
98
|
+
r_squared: result.r_squared,
|
|
99
|
+
std_error: result.standard_error,
|
|
100
|
+
n: result.predictions.len(),
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/// Calculate comprehensive distribution statistics (legacy format)
|
|
105
|
+
pub fn distribution_statistics(data: &[f64]) -> LegacyDistributionStats {
|
|
106
|
+
let stats = distribution_stats(data);
|
|
107
|
+
LegacyDistributionStats {
|
|
108
|
+
mean: stats.mean,
|
|
109
|
+
median: stats.median,
|
|
110
|
+
std_dev: stats.std_dev,
|
|
111
|
+
variance: stats.variance,
|
|
112
|
+
skewness: stats.skewness,
|
|
113
|
+
kurtosis: stats.kurtosis,
|
|
114
|
+
min: stats.min,
|
|
115
|
+
max: stats.max,
|
|
116
|
+
count: if data.is_empty() { 0 } else { data.len() },
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/// Calculate autocorrelation function (legacy format)
|
|
121
|
+
pub fn autocorrelation_legacy(data: &[f64], max_lag: usize) -> AutocorrelationResult {
|
|
122
|
+
if data.is_empty() || max_lag == 0 {
|
|
123
|
+
return AutocorrelationResult {
|
|
124
|
+
lags: vec![],
|
|
125
|
+
autocorrelations: vec![],
|
|
126
|
+
confidence_bound: 0.0,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let n = data.len();
|
|
131
|
+
let mean_val = crate::utils::mean(data);
|
|
132
|
+
let variance_val: f64 = data.iter()
|
|
133
|
+
.map(|x| (x - mean_val).powi(2))
|
|
134
|
+
.sum::<f64>() / n as f64;
|
|
135
|
+
|
|
136
|
+
if variance_val == 0.0 {
|
|
137
|
+
return AutocorrelationResult {
|
|
138
|
+
lags: (0..=max_lag).collect(),
|
|
139
|
+
autocorrelations: vec![0.0; max_lag + 1],
|
|
140
|
+
confidence_bound: 0.0,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let max_lag = max_lag.min(n - 1);
|
|
145
|
+
let lags: Vec<usize> = (0..=max_lag).collect();
|
|
146
|
+
let mut autocorrelations = Vec::with_capacity(max_lag + 1);
|
|
147
|
+
|
|
148
|
+
for &lag in &lags {
|
|
149
|
+
if lag == 0 {
|
|
150
|
+
autocorrelations.push(1.0);
|
|
151
|
+
} else {
|
|
152
|
+
let mut cov = 0.0;
|
|
153
|
+
for i in lag..n {
|
|
154
|
+
cov += (data[i] - mean_val) * (data[i - lag] - mean_val);
|
|
155
|
+
}
|
|
156
|
+
cov /= n as f64;
|
|
157
|
+
autocorrelations.push(cov / variance_val);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// 95% confidence bound
|
|
162
|
+
let confidence_bound = 1.96 / (n as f64).sqrt();
|
|
163
|
+
|
|
164
|
+
AutocorrelationResult {
|
|
165
|
+
lags,
|
|
166
|
+
autocorrelations,
|
|
167
|
+
confidence_bound,
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/// Calculate percentile of a dataset
|
|
172
|
+
pub fn percentile(data: &[f64], p: f64) -> f64 {
|
|
173
|
+
crate::utils::percentile(data, p)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
#[cfg(test)]
|
|
177
|
+
mod tests {
|
|
178
|
+
use super::*;
|
|
179
|
+
|
|
180
|
+
#[test]
|
|
181
|
+
fn test_correlation() {
|
|
182
|
+
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
|
|
183
|
+
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
|
|
184
|
+
let corr = correlation(&x, &y);
|
|
185
|
+
assert!((corr - 1.0).abs() < 1e-10);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
#[test]
|
|
189
|
+
fn test_linear_regression_legacy() {
|
|
190
|
+
let x = vec![1.0, 2.0, 3.0, 4.0, 5.0];
|
|
191
|
+
let y = vec![2.0, 4.0, 6.0, 8.0, 10.0];
|
|
192
|
+
let result = linear_regression_legacy(&x, &y);
|
|
193
|
+
assert!((result.slope - 2.0).abs() < 1e-10);
|
|
194
|
+
assert!((result.intercept).abs() < 1e-10);
|
|
195
|
+
assert!((result.r_squared - 1.0).abs() < 1e-10);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
#[test]
|
|
199
|
+
fn test_distribution_stats_legacy() {
|
|
200
|
+
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
|
|
201
|
+
let stats = distribution_statistics(&data);
|
|
202
|
+
assert_eq!(stats.mean, 3.0);
|
|
203
|
+
assert_eq!(stats.median, 3.0);
|
|
204
|
+
assert_eq!(stats.min, 1.0);
|
|
205
|
+
assert_eq!(stats.max, 5.0);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
#[test]
|
|
209
|
+
fn test_pearson_correlation() {
|
|
210
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
211
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
212
|
+
let corr = pearson_correlation(&x, &y);
|
|
213
|
+
assert!((corr - 1.0).abs() < 1e-10);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
#[test]
|
|
217
|
+
fn test_spearman_correlation() {
|
|
218
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
219
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
220
|
+
let corr = spearman_correlation(&x, &y);
|
|
221
|
+
assert!((corr - 1.0).abs() < 1e-10);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
#[test]
|
|
225
|
+
fn test_normal_cdf() {
|
|
226
|
+
// The approximation has slight numerical error, so we use 1e-9 tolerance
|
|
227
|
+
assert!((normal_cdf(0.0) - 0.5).abs() < 1e-9);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
#[test]
|
|
231
|
+
fn test_gamma() {
|
|
232
|
+
assert!((gamma(5.0) - 24.0).abs() < 1e-10);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
#[test]
|
|
236
|
+
fn test_distribution_stats() {
|
|
237
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
238
|
+
let stats = distribution_stats(&data);
|
|
239
|
+
assert!((stats.mean - 3.0).abs() < 1e-10);
|
|
240
|
+
assert!((stats.median - 3.0).abs() < 1e-10);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
#[test]
|
|
244
|
+
fn test_one_sample_t_test() {
|
|
245
|
+
let sample = [5.0, 5.5, 6.0, 6.5, 7.0];
|
|
246
|
+
let result = one_sample_t_test(&sample, 5.0);
|
|
247
|
+
assert!(result.t_statistic > 0.0);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
#[test]
|
|
251
|
+
fn test_rolling_stats() {
|
|
252
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
253
|
+
let stats = rolling_stats(&data, 3);
|
|
254
|
+
assert_eq!(stats.mean.len(), 3);
|
|
255
|
+
assert!((stats.mean[0] - 2.0).abs() < 1e-10);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
//! Regression Analysis Module
|
|
2
|
+
//!
|
|
3
|
+
//! Simple and multiple linear regression with statistical diagnostics
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
use crate::utils::mean;
|
|
7
|
+
use super::matrix::{transpose, matrix_multiply, matrix_vector_multiply, matrix_inverse};
|
|
8
|
+
|
|
9
|
+
/// Simple linear regression result
|
|
10
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
11
|
+
pub struct RegressionResult {
|
|
12
|
+
/// Slope (beta coefficient)
|
|
13
|
+
pub slope: f64,
|
|
14
|
+
/// Intercept (alpha)
|
|
15
|
+
pub intercept: f64,
|
|
16
|
+
/// R-squared (coefficient of determination)
|
|
17
|
+
pub r_squared: f64,
|
|
18
|
+
/// Adjusted R-squared
|
|
19
|
+
pub adjusted_r_squared: f64,
|
|
20
|
+
/// Standard error of the estimate
|
|
21
|
+
pub standard_error: f64,
|
|
22
|
+
/// Predicted values
|
|
23
|
+
pub predictions: Vec<f64>,
|
|
24
|
+
/// Residuals (actual - predicted)
|
|
25
|
+
pub residuals: Vec<f64>,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/// Multiple regression result
|
|
29
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
30
|
+
pub struct MultipleRegressionResult {
|
|
31
|
+
/// Coefficients for each predictor
|
|
32
|
+
pub coefficients: Vec<f64>,
|
|
33
|
+
/// Intercept
|
|
34
|
+
pub intercept: f64,
|
|
35
|
+
/// R-squared
|
|
36
|
+
pub r_squared: f64,
|
|
37
|
+
/// Predicted values
|
|
38
|
+
pub predictions: Vec<f64>,
|
|
39
|
+
/// Residuals
|
|
40
|
+
pub residuals: Vec<f64>,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/// Perform simple linear regression (y = slope * x + intercept)
|
|
44
|
+
pub fn linear_regression(x: &[f64], y: &[f64]) -> RegressionResult {
|
|
45
|
+
if x.len() != y.len() || x.len() < 2 {
|
|
46
|
+
return RegressionResult {
|
|
47
|
+
slope: 0.0,
|
|
48
|
+
intercept: 0.0,
|
|
49
|
+
r_squared: 0.0,
|
|
50
|
+
adjusted_r_squared: 0.0,
|
|
51
|
+
standard_error: 0.0,
|
|
52
|
+
predictions: Vec::new(),
|
|
53
|
+
residuals: Vec::new(),
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
let n = x.len();
|
|
58
|
+
let mean_x = mean(x);
|
|
59
|
+
let mean_y = mean(y);
|
|
60
|
+
|
|
61
|
+
// Calculate slope and intercept
|
|
62
|
+
let mut sum_xy = 0.0;
|
|
63
|
+
let mut sum_x2 = 0.0;
|
|
64
|
+
|
|
65
|
+
for i in 0..n {
|
|
66
|
+
sum_xy += (x[i] - mean_x) * (y[i] - mean_y);
|
|
67
|
+
sum_x2 += (x[i] - mean_x) * (x[i] - mean_x);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
let slope = if sum_x2 > 0.0 { sum_xy / sum_x2 } else { 0.0 };
|
|
71
|
+
let intercept = mean_y - slope * mean_x;
|
|
72
|
+
|
|
73
|
+
// Calculate predictions and residuals
|
|
74
|
+
let predictions: Vec<f64> = x.iter().map(|&xi| slope * xi + intercept).collect();
|
|
75
|
+
let residuals: Vec<f64> = y.iter()
|
|
76
|
+
.zip(predictions.iter())
|
|
77
|
+
.map(|(&yi, &pred)| yi - pred)
|
|
78
|
+
.collect();
|
|
79
|
+
|
|
80
|
+
// R-squared
|
|
81
|
+
let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
|
|
82
|
+
let ss_tot: f64 = y.iter().map(|yi| (yi - mean_y).powi(2)).sum();
|
|
83
|
+
let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
|
|
84
|
+
|
|
85
|
+
// Adjusted R-squared
|
|
86
|
+
let adjusted_r_squared = if n > 2 {
|
|
87
|
+
1.0 - ((1.0 - r_squared) * (n - 1) as f64) / (n - 2) as f64
|
|
88
|
+
} else {
|
|
89
|
+
r_squared
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
// Standard error of estimate
|
|
93
|
+
let standard_error = if n > 2 {
|
|
94
|
+
(ss_res / (n - 2) as f64).sqrt()
|
|
95
|
+
} else {
|
|
96
|
+
0.0
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
RegressionResult {
|
|
100
|
+
slope,
|
|
101
|
+
intercept,
|
|
102
|
+
r_squared,
|
|
103
|
+
adjusted_r_squared,
|
|
104
|
+
standard_error,
|
|
105
|
+
predictions,
|
|
106
|
+
residuals,
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/// Perform multiple linear regression (OLS)
|
|
111
|
+
pub fn multiple_regression(y: &[f64], x_matrix: &[Vec<f64>]) -> MultipleRegressionResult {
|
|
112
|
+
let n = y.len();
|
|
113
|
+
let k = x_matrix.len(); // Number of predictors
|
|
114
|
+
|
|
115
|
+
if n < k + 1 || k == 0 {
|
|
116
|
+
return MultipleRegressionResult {
|
|
117
|
+
coefficients: vec![0.0; k],
|
|
118
|
+
intercept: 0.0,
|
|
119
|
+
r_squared: 0.0,
|
|
120
|
+
predictions: Vec::new(),
|
|
121
|
+
residuals: Vec::new(),
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Add intercept term (column of 1s)
|
|
126
|
+
let mut x_design: Vec<Vec<f64>> = Vec::with_capacity(n);
|
|
127
|
+
for i in 0..n {
|
|
128
|
+
let mut row = vec![1.0];
|
|
129
|
+
for col in x_matrix {
|
|
130
|
+
row.push(col[i]);
|
|
131
|
+
}
|
|
132
|
+
x_design.push(row);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Normal equations: (X'X)^(-1) X'y
|
|
136
|
+
let xt = transpose(&x_design);
|
|
137
|
+
let xtx = matrix_multiply(&xt, &x_design);
|
|
138
|
+
|
|
139
|
+
let xtx_inv = match matrix_inverse(&xtx) {
|
|
140
|
+
Some(inv) => inv,
|
|
141
|
+
None => {
|
|
142
|
+
return MultipleRegressionResult {
|
|
143
|
+
coefficients: vec![0.0; k],
|
|
144
|
+
intercept: 0.0,
|
|
145
|
+
r_squared: 0.0,
|
|
146
|
+
predictions: Vec::new(),
|
|
147
|
+
residuals: Vec::new(),
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
let xty = matrix_vector_multiply(&xt, y);
|
|
153
|
+
let beta = matrix_vector_multiply(&xtx_inv, &xty);
|
|
154
|
+
|
|
155
|
+
let intercept = beta[0];
|
|
156
|
+
let coefficients = beta[1..].to_vec();
|
|
157
|
+
|
|
158
|
+
// Predictions
|
|
159
|
+
let predictions: Vec<f64> = (0..n)
|
|
160
|
+
.map(|i| {
|
|
161
|
+
let mut pred = intercept;
|
|
162
|
+
for (j, coeff) in coefficients.iter().enumerate() {
|
|
163
|
+
pred += coeff * x_matrix[j][i];
|
|
164
|
+
}
|
|
165
|
+
pred
|
|
166
|
+
})
|
|
167
|
+
.collect();
|
|
168
|
+
|
|
169
|
+
// Residuals
|
|
170
|
+
let residuals: Vec<f64> = y.iter()
|
|
171
|
+
.zip(predictions.iter())
|
|
172
|
+
.map(|(&yi, &pred)| yi - pred)
|
|
173
|
+
.collect();
|
|
174
|
+
|
|
175
|
+
// R-squared
|
|
176
|
+
let mean_y = mean(y);
|
|
177
|
+
let ss_tot: f64 = y.iter().map(|yi| (yi - mean_y).powi(2)).sum();
|
|
178
|
+
let ss_res: f64 = residuals.iter().map(|r| r * r).sum();
|
|
179
|
+
let r_squared = if ss_tot > 0.0 { 1.0 - ss_res / ss_tot } else { 0.0 };
|
|
180
|
+
|
|
181
|
+
MultipleRegressionResult {
|
|
182
|
+
coefficients,
|
|
183
|
+
intercept,
|
|
184
|
+
r_squared,
|
|
185
|
+
predictions,
|
|
186
|
+
residuals,
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
#[cfg(test)]
|
|
191
|
+
mod tests {
|
|
192
|
+
use super::*;
|
|
193
|
+
|
|
194
|
+
#[test]
|
|
195
|
+
fn test_linear_regression() {
|
|
196
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
197
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
198
|
+
let result = linear_regression(&x, &y);
|
|
199
|
+
|
|
200
|
+
assert!((result.slope - 2.0).abs() < 1e-10);
|
|
201
|
+
assert!(result.intercept.abs() < 1e-10);
|
|
202
|
+
assert!((result.r_squared - 1.0).abs() < 1e-10);
|
|
203
|
+
assert_eq!(result.predictions.len(), 5);
|
|
204
|
+
assert_eq!(result.residuals.len(), 5);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
#[test]
|
|
208
|
+
fn test_linear_regression_with_intercept() {
|
|
209
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
210
|
+
let y = [3.0, 5.0, 7.0, 9.0, 11.0]; // y = 2x + 1
|
|
211
|
+
let result = linear_regression(&x, &y);
|
|
212
|
+
|
|
213
|
+
assert!((result.slope - 2.0).abs() < 1e-10);
|
|
214
|
+
assert!((result.intercept - 1.0).abs() < 1e-10);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
#[test]
|
|
218
|
+
fn test_multiple_regression() {
|
|
219
|
+
let y = [5.0, 7.0, 9.0, 11.0, 13.0]; // y = x1 + x2 + 2
|
|
220
|
+
let x1 = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
221
|
+
let x2 = [2.0, 2.0, 2.0, 2.0, 2.0];
|
|
222
|
+
let x_matrix = vec![x1.to_vec(), x2.to_vec()];
|
|
223
|
+
|
|
224
|
+
let result = multiple_regression(&y, &x_matrix);
|
|
225
|
+
|
|
226
|
+
assert_eq!(result.coefficients.len(), 2);
|
|
227
|
+
assert!((result.intercept - 2.0).abs() < 1e-8 || result.intercept.abs() < 1e-8);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
#[test]
|
|
231
|
+
fn test_empty_regression() {
|
|
232
|
+
let x: [f64; 0] = [];
|
|
233
|
+
let y: [f64; 0] = [];
|
|
234
|
+
let result = linear_regression(&x, &y);
|
|
235
|
+
|
|
236
|
+
assert_eq!(result.slope, 0.0);
|
|
237
|
+
assert!(result.predictions.is_empty());
|
|
238
|
+
}
|
|
239
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
//! Rolling Statistics Module
|
|
2
|
+
//!
|
|
3
|
+
//! Rolling window statistical calculations
|
|
4
|
+
|
|
5
|
+
use serde::{Deserialize, Serialize};
|
|
6
|
+
use crate::utils::{mean, std_dev, min, max};
|
|
7
|
+
use super::correlation::pearson_correlation;
|
|
8
|
+
use super::regression::linear_regression;
|
|
9
|
+
|
|
10
|
+
/// Rolling statistics result
|
|
11
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
12
|
+
pub struct RollingStats {
|
|
13
|
+
/// Rolling mean
|
|
14
|
+
pub mean: Vec<f64>,
|
|
15
|
+
/// Rolling standard deviation
|
|
16
|
+
pub std_dev: Vec<f64>,
|
|
17
|
+
/// Rolling minimum
|
|
18
|
+
pub min: Vec<f64>,
|
|
19
|
+
/// Rolling maximum
|
|
20
|
+
pub max: Vec<f64>,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/// Rolling regression result
|
|
24
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
25
|
+
pub struct RollingRegressionResult {
|
|
26
|
+
/// Rolling slope
|
|
27
|
+
pub slope: Vec<f64>,
|
|
28
|
+
/// Rolling intercept
|
|
29
|
+
pub intercept: Vec<f64>,
|
|
30
|
+
/// Rolling R-squared
|
|
31
|
+
pub r_squared: Vec<f64>,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/// Calculate rolling statistics
|
|
35
|
+
pub fn rolling_stats(data: &[f64], period: usize) -> RollingStats {
|
|
36
|
+
if data.len() < period || period == 0 {
|
|
37
|
+
return RollingStats {
|
|
38
|
+
mean: Vec::new(),
|
|
39
|
+
std_dev: Vec::new(),
|
|
40
|
+
min: Vec::new(),
|
|
41
|
+
max: Vec::new(),
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
let result_len = data.len() - period + 1;
|
|
46
|
+
let mut means = Vec::with_capacity(result_len);
|
|
47
|
+
let mut std_devs = Vec::with_capacity(result_len);
|
|
48
|
+
let mut mins = Vec::with_capacity(result_len);
|
|
49
|
+
let mut maxs = Vec::with_capacity(result_len);
|
|
50
|
+
|
|
51
|
+
for i in 0..result_len {
|
|
52
|
+
let slice = &data[i..i + period];
|
|
53
|
+
means.push(mean(slice));
|
|
54
|
+
std_devs.push(std_dev(slice, false));
|
|
55
|
+
mins.push(min(slice));
|
|
56
|
+
maxs.push(max(slice));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
RollingStats {
|
|
60
|
+
mean: means,
|
|
61
|
+
std_dev: std_devs,
|
|
62
|
+
min: mins,
|
|
63
|
+
max: maxs,
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/// Calculate rolling correlation between two series
|
|
68
|
+
pub fn rolling_correlation(x: &[f64], y: &[f64], period: usize) -> Vec<f64> {
|
|
69
|
+
if x.len() != y.len() || x.len() < period || period == 0 {
|
|
70
|
+
return Vec::new();
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let result_len = x.len() - period + 1;
|
|
74
|
+
let mut result = Vec::with_capacity(result_len);
|
|
75
|
+
|
|
76
|
+
for i in 0..result_len {
|
|
77
|
+
let x_slice = &x[i..i + period];
|
|
78
|
+
let y_slice = &y[i..i + period];
|
|
79
|
+
result.push(pearson_correlation(x_slice, y_slice));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
result
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/// Calculate rolling regression
|
|
86
|
+
pub fn rolling_regression(x: &[f64], y: &[f64], period: usize) -> RollingRegressionResult {
|
|
87
|
+
if x.len() != y.len() || x.len() < period || period == 0 {
|
|
88
|
+
return RollingRegressionResult {
|
|
89
|
+
slope: Vec::new(),
|
|
90
|
+
intercept: Vec::new(),
|
|
91
|
+
r_squared: Vec::new(),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
let result_len = x.len() - period + 1;
|
|
96
|
+
let mut slopes = Vec::with_capacity(result_len);
|
|
97
|
+
let mut intercepts = Vec::with_capacity(result_len);
|
|
98
|
+
let mut r_squareds = Vec::with_capacity(result_len);
|
|
99
|
+
|
|
100
|
+
for i in 0..result_len {
|
|
101
|
+
let x_slice = &x[i..i + period];
|
|
102
|
+
let y_slice = &y[i..i + period];
|
|
103
|
+
let reg = linear_regression(x_slice, y_slice);
|
|
104
|
+
|
|
105
|
+
slopes.push(reg.slope);
|
|
106
|
+
intercepts.push(reg.intercept);
|
|
107
|
+
r_squareds.push(reg.r_squared);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
RollingRegressionResult {
|
|
111
|
+
slope: slopes,
|
|
112
|
+
intercept: intercepts,
|
|
113
|
+
r_squared: r_squareds,
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/// Calculate rolling z-score
|
|
118
|
+
pub fn rolling_z_score(data: &[f64], period: usize) -> Vec<f64> {
|
|
119
|
+
if data.len() < period || period == 0 {
|
|
120
|
+
return Vec::new();
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
let result_len = data.len() - period + 1;
|
|
124
|
+
let mut result = Vec::with_capacity(result_len);
|
|
125
|
+
|
|
126
|
+
for i in 0..result_len {
|
|
127
|
+
let slice = &data[i..i + period];
|
|
128
|
+
let avg = mean(slice);
|
|
129
|
+
let sd = std_dev(slice, false);
|
|
130
|
+
|
|
131
|
+
// Z-score of the last element in the window
|
|
132
|
+
let z = if sd > 0.0 {
|
|
133
|
+
(slice[period - 1] - avg) / sd
|
|
134
|
+
} else {
|
|
135
|
+
0.0
|
|
136
|
+
};
|
|
137
|
+
result.push(z);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
result
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
#[cfg(test)]
|
|
144
|
+
mod tests {
|
|
145
|
+
use super::*;
|
|
146
|
+
|
|
147
|
+
#[test]
|
|
148
|
+
fn test_rolling_stats() {
|
|
149
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
150
|
+
let stats = rolling_stats(&data, 3);
|
|
151
|
+
|
|
152
|
+
assert_eq!(stats.mean.len(), 3);
|
|
153
|
+
assert!((stats.mean[0] - 2.0).abs() < 1e-10);
|
|
154
|
+
assert!((stats.mean[1] - 3.0).abs() < 1e-10);
|
|
155
|
+
assert!((stats.mean[2] - 4.0).abs() < 1e-10);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
#[test]
|
|
159
|
+
fn test_rolling_correlation() {
|
|
160
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
161
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
162
|
+
let corr = rolling_correlation(&x, &y, 3);
|
|
163
|
+
|
|
164
|
+
assert_eq!(corr.len(), 3);
|
|
165
|
+
// All windows should have perfect correlation
|
|
166
|
+
for &c in &corr {
|
|
167
|
+
assert!((c - 1.0).abs() < 1e-10);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
#[test]
|
|
172
|
+
fn test_rolling_regression() {
|
|
173
|
+
let x = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
174
|
+
let y = [2.0, 4.0, 6.0, 8.0, 10.0];
|
|
175
|
+
let reg = rolling_regression(&x, &y, 3);
|
|
176
|
+
|
|
177
|
+
assert_eq!(reg.slope.len(), 3);
|
|
178
|
+
// All slopes should be 2
|
|
179
|
+
for &s in ®.slope {
|
|
180
|
+
assert!((s - 2.0).abs() < 1e-10);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#[test]
|
|
185
|
+
fn test_rolling_z_score() {
|
|
186
|
+
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
|
|
187
|
+
let z = rolling_z_score(&data, 3);
|
|
188
|
+
|
|
189
|
+
assert_eq!(z.len(), 3);
|
|
190
|
+
// Last window [3, 4, 5], mean=4, std_dev=1, last element z = (5-4)/1 = 1
|
|
191
|
+
assert!((z[2] - 1.0).abs() < 1e-10);
|
|
192
|
+
}
|
|
193
|
+
}
|