philiprehberger-math_kit 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +118 -25
- data/lib/philiprehberger/math_kit/regression.rb +59 -0
- data/lib/philiprehberger/math_kit/stats.rb +227 -0
- data/lib/philiprehberger/math_kit/version.rb +1 -1
- data/lib/philiprehberger/math_kit.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 96f9c86935c3a083d2e2a7c7f44f58faf74e8d0bef607c352a82eddfd722b9d8
|
|
4
|
+
data.tar.gz: 29c3c539be7b4c98b0cacaf6778dd47e3b99ebf0ee8bebd1ffa1aecfcc75b724
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b511e326737819a0edc160c9bdf9c11dbb940d1dcc0996f4371442d6ec77d9674fcbea55e134d206ab5289858072071f55aeb27eab180bfdcd421335d2d97f75
|
|
7
|
+
data.tar.gz: b7f1196c85d273cae6a4e0f8bb728c95eecf2e6ced1fae380b9a836cd0078aece475aebe36f9ad8627b428c117c0a6246cee522412607326517bce77455e654f
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.1] - 2026-03-31
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
- Standardize README badges, support section, and license format
|
|
14
|
+
|
|
15
|
+
## [0.2.0] - 2026-03-27
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- Skewness (Fisher-Pearson sample skewness)
|
|
19
|
+
- Kurtosis (excess kurtosis, Fisher definition)
|
|
20
|
+
- Confidence intervals for the mean (90%, 95%, 99% levels with t-distribution)
|
|
21
|
+
- Linear regression with slope, intercept, r-squared, and prediction
|
|
22
|
+
- Pearson correlation coefficient
|
|
23
|
+
- Sample covariance
|
|
24
|
+
- Min-max normalization (normalize to 0..1)
|
|
25
|
+
- Z-score standardization (mean=0, stddev=1)
|
|
26
|
+
- Median absolute deviation (MAD)
|
|
27
|
+
- Trimmed mean with configurable trim fraction
|
|
28
|
+
- Winsorized mean with configurable trim fraction
|
|
29
|
+
- `Regression` module with `Result` data class and `predict` method
|
|
30
|
+
|
|
31
|
+
## [0.1.2] - 2026-03-26
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
- Add Sponsor badge to README
|
|
35
|
+
- Fix License section format
|
|
36
|
+
- Sync gemspec summary with README
|
|
37
|
+
|
|
38
|
+
## [0.1.1] - 2026-03-26
|
|
39
|
+
|
|
40
|
+
### Changed
|
|
41
|
+
|
|
42
|
+
- Fix README compliance (one-liner, license link)
|
|
43
|
+
|
|
10
44
|
## [0.1.0] - 2026-03-26
|
|
11
45
|
|
|
12
46
|
### Added
|
data/README.md
CHANGED
|
@@ -2,10 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/philiprehberger/rb-math-kit/actions/workflows/ci.yml)
|
|
4
4
|
[](https://rubygems.org/gems/philiprehberger-math_kit)
|
|
5
|
-
[](https://github.com/sponsors/philiprehberger)
|
|
5
|
+
[](https://github.com/philiprehberger/rb-math-kit/commits/main)
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
Statistics, regression, interpolation, rounding modes, and moving averages for Ruby
|
|
9
8
|
|
|
10
9
|
## Requirements
|
|
11
10
|
|
|
@@ -44,11 +43,51 @@ Philiprehberger::MathKit::Stats.sum([1, 2, 3]) # => 6
|
|
|
44
43
|
Philiprehberger::MathKit::Stats.range([1, 5, 3, 9, 2]) # => 8
|
|
45
44
|
```
|
|
46
45
|
|
|
47
|
-
|
|
46
|
+
### Skewness and Kurtosis
|
|
48
47
|
|
|
49
48
|
```ruby
|
|
50
|
-
Philiprehberger::MathKit::Stats.
|
|
51
|
-
Philiprehberger::MathKit::Stats.
|
|
49
|
+
Philiprehberger::MathKit::Stats.skewness([1, 1, 1, 2, 5, 10]) # => positive (right-skewed)
|
|
50
|
+
Philiprehberger::MathKit::Stats.kurtosis([2, 3, 4, 5, 6, 7]) # => near 0 (normal-like)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Confidence Intervals
|
|
54
|
+
|
|
55
|
+
```ruby
|
|
56
|
+
data = [10, 12, 14, 16, 18]
|
|
57
|
+
lower, upper = Philiprehberger::MathKit::Stats.confidence_interval(data, level: 0.95)
|
|
58
|
+
# => [9.87, 18.13] (approximate)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Correlation and Covariance
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
Philiprehberger::MathKit::Stats.correlation([1, 2, 3, 4], [2, 4, 6, 8]) # => 1.0
|
|
65
|
+
Philiprehberger::MathKit::Stats.covariance([1, 2, 3, 4], [2, 4, 6, 8]) # => 3.333...
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Data Normalization
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
Philiprehberger::MathKit::Stats.normalize([10, 20, 30]) # => [0.0, 0.5, 1.0]
|
|
72
|
+
Philiprehberger::MathKit::Stats.standardize([10, 20, 30, 40]) # => z-scores (mean=0, stddev=1)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Robust Statistics
|
|
76
|
+
|
|
77
|
+
```ruby
|
|
78
|
+
Philiprehberger::MathKit::Stats.median_absolute_deviation([1, 1, 2, 2, 4, 6, 9]) # => 1.0
|
|
79
|
+
Philiprehberger::MathKit::Stats.trimmed_mean([1, 2, 3, 4, 100], trim: 0.2) # => 3.0
|
|
80
|
+
Philiprehberger::MathKit::Stats.winsorized_mean([1, 2, 3, 4, 100], trim: 0.2) # => less affected by outlier
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Linear Regression
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
result = Philiprehberger::MathKit::Regression.linear([1, 2, 3, 4], [2, 4, 6, 8])
|
|
87
|
+
result.slope # => 2.0
|
|
88
|
+
result.intercept # => 0.0
|
|
89
|
+
result.r_squared # => 1.0
|
|
90
|
+
result.predict(5) # => 10.0
|
|
52
91
|
```
|
|
53
92
|
|
|
54
93
|
### Interpolation
|
|
@@ -67,7 +106,6 @@ Philiprehberger::MathKit::Round.bankers(3.5) # => 4.0
|
|
|
67
106
|
Philiprehberger::MathKit::Round.ceiling(2.1) # => 3.0
|
|
68
107
|
Philiprehberger::MathKit::Round.floor(2.9) # => 2.0
|
|
69
108
|
Philiprehberger::MathKit::Round.truncate(2.9) # => 2.0
|
|
70
|
-
Philiprehberger::MathKit::Round.truncate(-2.9) # => -2.0
|
|
71
109
|
Philiprehberger::MathKit::Round.bankers(2.55, precision: 1) # => 2.6
|
|
72
110
|
```
|
|
73
111
|
|
|
@@ -80,32 +118,87 @@ Philiprehberger::MathKit::MovingAverage.exponential([1, 2, 3, 4, 5], alpha: 0.5)
|
|
|
80
118
|
|
|
81
119
|
## API
|
|
82
120
|
|
|
121
|
+
### `Stats`
|
|
122
|
+
|
|
83
123
|
| Method | Description |
|
|
84
124
|
|--------|-------------|
|
|
85
|
-
|
|
|
86
|
-
|
|
|
87
|
-
|
|
|
88
|
-
|
|
|
89
|
-
|
|
|
90
|
-
|
|
|
91
|
-
|
|
|
92
|
-
|
|
|
93
|
-
|
|
|
94
|
-
|
|
|
95
|
-
|
|
|
96
|
-
|
|
|
97
|
-
|
|
|
98
|
-
|
|
|
99
|
-
|
|
|
125
|
+
| `.mean(values)` | Arithmetic mean |
|
|
126
|
+
| `.median(values)` | Median (middle value or average of two middle) |
|
|
127
|
+
| `.mode(values)` | Mode(s) as array |
|
|
128
|
+
| `.variance(values, population: true)` | Population or sample variance |
|
|
129
|
+
| `.stddev(values, population: true)` | Standard deviation |
|
|
130
|
+
| `.percentile(values, p)` | Percentile (0-100) with linear interpolation |
|
|
131
|
+
| `.sum(values)` | Sum of values |
|
|
132
|
+
| `.range(values)` | Max - min |
|
|
133
|
+
| `.skewness(values)` | Sample skewness (Fisher-Pearson) |
|
|
134
|
+
| `.kurtosis(values)` | Sample excess kurtosis (Fisher definition) |
|
|
135
|
+
| `.confidence_interval(values, level: 0.95)` | Confidence interval for the mean |
|
|
136
|
+
| `.correlation(xs, ys)` | Pearson correlation coefficient |
|
|
137
|
+
| `.covariance(xs, ys)` | Sample covariance |
|
|
138
|
+
| `.normalize(values)` | Min-max normalization to 0..1 |
|
|
139
|
+
| `.standardize(values)` | Z-score standardization (mean=0, stddev=1) |
|
|
140
|
+
| `.median_absolute_deviation(values)` | Median absolute deviation |
|
|
141
|
+
| `.trimmed_mean(values, trim: 0.1)` | Trimmed mean (remove fraction from each end) |
|
|
142
|
+
| `.winsorized_mean(values, trim: 0.1)` | Winsorized mean (clamp extremes) |
|
|
143
|
+
|
|
144
|
+
### `Regression`
|
|
145
|
+
|
|
146
|
+
| Method | Description |
|
|
147
|
+
|--------|-------------|
|
|
148
|
+
| `.linear(xs, ys)` | Ordinary least squares linear regression |
|
|
149
|
+
| `Result#slope` | Slope of the fitted line |
|
|
150
|
+
| `Result#intercept` | Y-intercept of the fitted line |
|
|
151
|
+
| `Result#r_squared` | Coefficient of determination (0 to 1) |
|
|
152
|
+
| `Result#predict(x)` | Predict y for a given x |
|
|
153
|
+
|
|
154
|
+
### `Interpolation`
|
|
155
|
+
|
|
156
|
+
| Method | Description |
|
|
157
|
+
|--------|-------------|
|
|
158
|
+
| `.linear(points, x)` | Linear interpolation between sorted points |
|
|
159
|
+
|
|
160
|
+
### `Round`
|
|
161
|
+
|
|
162
|
+
| Method | Description |
|
|
163
|
+
|--------|-------------|
|
|
164
|
+
| `.bankers(value, precision: 0)` | Banker's rounding (round half to even) |
|
|
165
|
+
| `.ceiling(value, precision: 0)` | Round up |
|
|
166
|
+
| `.floor(value, precision: 0)` | Round down |
|
|
167
|
+
| `.truncate(value, precision: 0)` | Truncate toward zero |
|
|
168
|
+
|
|
169
|
+
### `MovingAverage`
|
|
170
|
+
|
|
171
|
+
| Method | Description |
|
|
172
|
+
|--------|-------------|
|
|
173
|
+
| `.simple(values, window:)` | Simple moving average |
|
|
174
|
+
| `.exponential(values, alpha:)` | Exponential moving average |
|
|
100
175
|
|
|
101
176
|
## Development
|
|
102
177
|
|
|
103
178
|
```bash
|
|
104
179
|
bundle install
|
|
105
|
-
bundle exec rspec
|
|
106
|
-
bundle exec rubocop
|
|
180
|
+
bundle exec rspec
|
|
181
|
+
bundle exec rubocop
|
|
107
182
|
```
|
|
108
183
|
|
|
184
|
+
## Support
|
|
185
|
+
|
|
186
|
+
If you find this project useful:
|
|
187
|
+
|
|
188
|
+
⭐ [Star the repo](https://github.com/philiprehberger/rb-math-kit)
|
|
189
|
+
|
|
190
|
+
🐛 [Report issues](https://github.com/philiprehberger/rb-math-kit/issues?q=is%3Aissue+is%3Aopen+label%3Abug)
|
|
191
|
+
|
|
192
|
+
💡 [Suggest features](https://github.com/philiprehberger/rb-math-kit/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement)
|
|
193
|
+
|
|
194
|
+
❤️ [Sponsor development](https://github.com/sponsors/philiprehberger)
|
|
195
|
+
|
|
196
|
+
🌐 [All Open Source Projects](https://philiprehberger.com/open-source-packages)
|
|
197
|
+
|
|
198
|
+
💻 [GitHub Profile](https://github.com/philiprehberger)
|
|
199
|
+
|
|
200
|
+
🔗 [LinkedIn Profile](https://www.linkedin.com/in/philiprehberger)
|
|
201
|
+
|
|
109
202
|
## License
|
|
110
203
|
|
|
111
|
-
MIT
|
|
204
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Philiprehberger
|
|
4
|
+
module MathKit
|
|
5
|
+
# Linear regression analysis
|
|
6
|
+
module Regression
|
|
7
|
+
# Result of a linear regression
|
|
8
|
+
Result = Struct.new(:slope, :intercept, :r_squared, keyword_init: true) do
|
|
9
|
+
# Predict the y value for a given x
|
|
10
|
+
#
|
|
11
|
+
# @param x [Numeric] the input value
|
|
12
|
+
# @return [Float] the predicted y value
|
|
13
|
+
def predict(x)
|
|
14
|
+
intercept + (slope * x)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
# Perform ordinary least squares linear regression
|
|
20
|
+
#
|
|
21
|
+
# @param xs [Array<Numeric>] independent variable values
|
|
22
|
+
# @param ys [Array<Numeric>] dependent variable values
|
|
23
|
+
# @return [Result] regression result with slope, intercept, and r_squared
|
|
24
|
+
# @raise [ArgumentError] if arrays differ in size or have fewer than 2 points
|
|
25
|
+
def linear(xs, ys)
|
|
26
|
+
raise ArgumentError, 'datasets must have the same size' if xs.size != ys.size
|
|
27
|
+
raise ArgumentError, 'linear regression requires at least 2 data points' if xs.size < 2
|
|
28
|
+
|
|
29
|
+
n = xs.size.to_f
|
|
30
|
+
sum_x = xs.sum.to_f
|
|
31
|
+
sum_y = ys.sum.to_f
|
|
32
|
+
sum_xy = xs.zip(ys).sum { |x, y| x * y }.to_f
|
|
33
|
+
sum_x2 = xs.sum { |x| x**2 }.to_f
|
|
34
|
+
|
|
35
|
+
denom = (n * sum_x2) - (sum_x**2)
|
|
36
|
+
raise ArgumentError, 'all x values are identical — cannot fit a line' if denom.zero?
|
|
37
|
+
|
|
38
|
+
slope = ((n * sum_xy) - (sum_x * sum_y)) / denom
|
|
39
|
+
intercept = (sum_y - (slope * sum_x)) / n
|
|
40
|
+
|
|
41
|
+
r_squared = compute_r_squared(ys, xs, slope, intercept)
|
|
42
|
+
|
|
43
|
+
Result.new(slope: slope, intercept: intercept, r_squared: r_squared)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def compute_r_squared(ys, xs, slope, intercept)
|
|
49
|
+
mean_y = ys.sum.to_f / ys.size
|
|
50
|
+
ss_tot = ys.sum { |y| (y - mean_y)**2 }
|
|
51
|
+
return 1.0 if ss_tot.zero?
|
|
52
|
+
|
|
53
|
+
ss_res = xs.zip(ys).sum { |x, y| (y - (intercept + (slope * x)))**2 }
|
|
54
|
+
1.0 - (ss_res / ss_tot)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -115,6 +115,233 @@ module Philiprehberger
|
|
|
115
115
|
|
|
116
116
|
values.max - values.min
|
|
117
117
|
end
|
|
118
|
+
|
|
119
|
+
# Sample skewness (Fisher-Pearson)
|
|
120
|
+
#
|
|
121
|
+
# @param values [Array<Numeric>] the input values
|
|
122
|
+
# @return [Float] the sample skewness
|
|
123
|
+
# @raise [ArgumentError] if fewer than 3 values
|
|
124
|
+
def skewness(values)
|
|
125
|
+
n = values.size
|
|
126
|
+
raise ArgumentError, 'skewness requires at least 3 values' if n < 3
|
|
127
|
+
|
|
128
|
+
avg = mean(values)
|
|
129
|
+
s = stddev(values, population: false)
|
|
130
|
+
return 0.0 if s.zero?
|
|
131
|
+
|
|
132
|
+
m3 = values.sum { |v| (v - avg)**3 } / n.to_f
|
|
133
|
+
adjustment = (n.to_f * (n - 1)) / (n - 2)
|
|
134
|
+
(adjustment / n) * (m3 / (s**3)) * n
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Sample excess kurtosis (Fisher definition, normal = 0)
|
|
138
|
+
#
|
|
139
|
+
# @param values [Array<Numeric>] the input values
|
|
140
|
+
# @return [Float] the sample excess kurtosis
|
|
141
|
+
# @raise [ArgumentError] if fewer than 4 values
|
|
142
|
+
def kurtosis(values)
|
|
143
|
+
n = values.size
|
|
144
|
+
raise ArgumentError, 'kurtosis requires at least 4 values' if n < 4
|
|
145
|
+
|
|
146
|
+
avg = mean(values)
|
|
147
|
+
s2 = variance(values, population: false)
|
|
148
|
+
return 0.0 if s2.zero?
|
|
149
|
+
|
|
150
|
+
m4 = values.sum { |v| (v - avg)**4 } / n.to_f
|
|
151
|
+
raw = m4 / (s2**2)
|
|
152
|
+
# Adjusted Fisher kurtosis
|
|
153
|
+
prefactor = (n.to_f * (n + 1)) / ((n - 1) * (n - 2) * (n - 3))
|
|
154
|
+
correction = (3.0 * ((n - 1)**2)) / ((n - 2) * (n - 3))
|
|
155
|
+
(prefactor * n * raw) - correction
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Confidence interval for the mean using t-distribution critical values
|
|
159
|
+
#
|
|
160
|
+
# @param values [Array<Numeric>] the input values
|
|
161
|
+
# @param level [Float] confidence level (0.90, 0.95, or 0.99)
|
|
162
|
+
# @return [Array(Float, Float)] lower and upper bounds
|
|
163
|
+
# @raise [ArgumentError] if fewer than 2 values or unsupported level
|
|
164
|
+
def confidence_interval(values, level: 0.95)
|
|
165
|
+
n = values.size
|
|
166
|
+
raise ArgumentError, 'confidence interval requires at least 2 values' if n < 2
|
|
167
|
+
|
|
168
|
+
t_value = t_critical(n - 1, level)
|
|
169
|
+
avg = mean(values)
|
|
170
|
+
se = stddev(values, population: false) / Math.sqrt(n)
|
|
171
|
+
margin = t_value * se
|
|
172
|
+
|
|
173
|
+
[avg - margin, avg + margin]
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Pearson correlation coefficient between two datasets
|
|
177
|
+
#
|
|
178
|
+
# @param xs [Array<Numeric>] first dataset
|
|
179
|
+
# @param ys [Array<Numeric>] second dataset
|
|
180
|
+
# @return [Float] the Pearson correlation coefficient (-1 to 1)
|
|
181
|
+
# @raise [ArgumentError] if datasets differ in size or have fewer than 2 values
|
|
182
|
+
def correlation(xs, ys)
|
|
183
|
+
raise ArgumentError, 'datasets must have the same size' if xs.size != ys.size
|
|
184
|
+
raise ArgumentError, 'correlation requires at least 2 values' if xs.size < 2
|
|
185
|
+
|
|
186
|
+
cov = covariance(xs, ys)
|
|
187
|
+
sx = stddev(xs, population: false)
|
|
188
|
+
sy = stddev(ys, population: false)
|
|
189
|
+
return 0.0 if sx.zero? || sy.zero?
|
|
190
|
+
|
|
191
|
+
cov / (sx * sy)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Sample covariance between two datasets
|
|
195
|
+
#
|
|
196
|
+
# @param xs [Array<Numeric>] first dataset
|
|
197
|
+
# @param ys [Array<Numeric>] second dataset
|
|
198
|
+
# @return [Float] the sample covariance
|
|
199
|
+
# @raise [ArgumentError] if datasets differ in size or have fewer than 2 values
|
|
200
|
+
def covariance(xs, ys)
|
|
201
|
+
raise ArgumentError, 'datasets must have the same size' if xs.size != ys.size
|
|
202
|
+
raise ArgumentError, 'covariance requires at least 2 values' if xs.size < 2
|
|
203
|
+
|
|
204
|
+
n = xs.size
|
|
205
|
+
avg_x = mean(xs)
|
|
206
|
+
avg_y = mean(ys)
|
|
207
|
+
xs.zip(ys).sum { |x, y| (x - avg_x) * (y - avg_y) } / (n - 1).to_f
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Min-max normalization to 0..1 range
|
|
211
|
+
#
|
|
212
|
+
# @param values [Array<Numeric>] the input values
|
|
213
|
+
# @return [Array<Float>] normalized values
|
|
214
|
+
# @raise [ArgumentError] if values is empty
|
|
215
|
+
def normalize(values)
|
|
216
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
217
|
+
|
|
218
|
+
min_val = values.min.to_f
|
|
219
|
+
max_val = values.max.to_f
|
|
220
|
+
span = max_val - min_val
|
|
221
|
+
return values.map { 0.0 } if span.zero?
|
|
222
|
+
|
|
223
|
+
values.map { |v| (v - min_val) / span }
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Z-score standardization (mean=0, stddev=1)
|
|
227
|
+
#
|
|
228
|
+
# @param values [Array<Numeric>] the input values
|
|
229
|
+
# @return [Array<Float>] standardized values
|
|
230
|
+
# @raise [ArgumentError] if fewer than 2 values
|
|
231
|
+
def standardize(values)
|
|
232
|
+
raise ArgumentError, 'standardize requires at least 2 values' if values.size < 2
|
|
233
|
+
|
|
234
|
+
avg = mean(values)
|
|
235
|
+
s = stddev(values, population: false)
|
|
236
|
+
return values.map { 0.0 } if s.zero?
|
|
237
|
+
|
|
238
|
+
values.map { |v| (v - avg) / s }
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Median absolute deviation
|
|
242
|
+
#
|
|
243
|
+
# @param values [Array<Numeric>] the input values
|
|
244
|
+
# @return [Float] the MAD
|
|
245
|
+
# @raise [ArgumentError] if values is empty
|
|
246
|
+
def median_absolute_deviation(values)
|
|
247
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
248
|
+
|
|
249
|
+
med = median(values)
|
|
250
|
+
deviations = values.map { |v| (v - med).abs }
|
|
251
|
+
median(deviations)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Trimmed mean (removes a fraction from each end before averaging)
|
|
255
|
+
#
|
|
256
|
+
# @param values [Array<Numeric>] the input values
|
|
257
|
+
# @param trim [Float] fraction to trim from each end (0.0 to 0.5 exclusive)
|
|
258
|
+
# @return [Float] the trimmed mean
|
|
259
|
+
# @raise [ArgumentError] if values is empty or trim is out of range
|
|
260
|
+
def trimmed_mean(values, trim: 0.1)
|
|
261
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
262
|
+
raise ArgumentError, 'trim must be between 0.0 and 0.5 (exclusive)' if trim.negative? || trim >= 0.5
|
|
263
|
+
|
|
264
|
+
sorted = values.sort
|
|
265
|
+
n = sorted.size
|
|
266
|
+
k = (n * trim).floor
|
|
267
|
+
return mean(sorted) if k.zero?
|
|
268
|
+
|
|
269
|
+
trimmed = sorted[k..-(k + 1)]
|
|
270
|
+
mean(trimmed)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Winsorized mean (replaces extremes with boundary values before averaging)
|
|
274
|
+
#
|
|
275
|
+
# @param values [Array<Numeric>] the input values
|
|
276
|
+
# @param trim [Float] fraction to winsorize from each end (0.0 to 0.5 exclusive)
|
|
277
|
+
# @return [Float] the winsorized mean
|
|
278
|
+
# @raise [ArgumentError] if values is empty or trim is out of range
|
|
279
|
+
def winsorized_mean(values, trim: 0.1)
|
|
280
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
281
|
+
raise ArgumentError, 'trim must be between 0.0 and 0.5 (exclusive)' if trim.negative? || trim >= 0.5
|
|
282
|
+
|
|
283
|
+
sorted = values.sort
|
|
284
|
+
n = sorted.size
|
|
285
|
+
k = (n * trim).floor
|
|
286
|
+
return mean(sorted) if k.zero?
|
|
287
|
+
|
|
288
|
+
low = sorted[k]
|
|
289
|
+
high = sorted[-(k + 1)]
|
|
290
|
+
winsorized = sorted.map { |v| [[v, low].max, high].min }
|
|
291
|
+
mean(winsorized)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
private
|
|
295
|
+
|
|
296
|
+
# T-distribution critical values for common confidence levels
|
|
297
|
+
# Uses a lookup table for degrees of freedom up to 200 and common levels
|
|
298
|
+
#
|
|
299
|
+
# @param df [Integer] degrees of freedom
|
|
300
|
+
# @param level [Float] confidence level
|
|
301
|
+
# @return [Float] the t critical value
|
|
302
|
+
def t_critical(df, level)
|
|
303
|
+
# Two-tailed critical values for common levels
|
|
304
|
+
# For large df (>120), use z-approximation
|
|
305
|
+
t_values = {
|
|
306
|
+
0.90 => { 1 => 6.314, 2 => 2.920, 3 => 2.353, 4 => 2.132, 5 => 2.015,
|
|
307
|
+
6 => 1.943, 7 => 1.895, 8 => 1.860, 9 => 1.833, 10 => 1.812,
|
|
308
|
+
15 => 1.753, 20 => 1.725, 25 => 1.708, 30 => 1.697,
|
|
309
|
+
40 => 1.684, 50 => 1.676, 60 => 1.671, 80 => 1.664,
|
|
310
|
+
100 => 1.660, 120 => 1.658 },
|
|
311
|
+
0.95 => { 1 => 12.706, 2 => 4.303, 3 => 3.182, 4 => 2.776, 5 => 2.571,
|
|
312
|
+
6 => 2.447, 7 => 2.365, 8 => 2.306, 9 => 2.262, 10 => 2.228,
|
|
313
|
+
15 => 2.131, 20 => 2.086, 25 => 2.060, 30 => 2.042,
|
|
314
|
+
40 => 2.021, 50 => 2.009, 60 => 2.000, 80 => 1.990,
|
|
315
|
+
100 => 1.984, 120 => 1.980 },
|
|
316
|
+
0.99 => { 1 => 63.657, 2 => 9.925, 3 => 5.841, 4 => 4.604, 5 => 4.032,
|
|
317
|
+
6 => 3.707, 7 => 3.499, 8 => 3.355, 9 => 3.250, 10 => 3.169,
|
|
318
|
+
15 => 2.947, 20 => 2.845, 25 => 2.787, 30 => 2.750,
|
|
319
|
+
40 => 2.704, 50 => 2.678, 60 => 2.660, 80 => 2.639,
|
|
320
|
+
100 => 2.626, 120 => 2.617 }
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
z_values = { 0.90 => 1.645, 0.95 => 1.960, 0.99 => 2.576 }
|
|
324
|
+
|
|
325
|
+
raise ArgumentError, "unsupported confidence level: #{level}. Use 0.90, 0.95, or 0.99" unless t_values.key?(level)
|
|
326
|
+
|
|
327
|
+
return z_values[level] if df > 120
|
|
328
|
+
|
|
329
|
+
table = t_values[level]
|
|
330
|
+
return table[df] if table.key?(df)
|
|
331
|
+
|
|
332
|
+
# Interpolate between nearest known degrees of freedom
|
|
333
|
+
keys = table.keys.sort
|
|
334
|
+
lower = keys.select { |k| k <= df }.last
|
|
335
|
+
upper = keys.select { |k| k >= df }.first
|
|
336
|
+
|
|
337
|
+
return table[lower] if lower == upper
|
|
338
|
+
|
|
339
|
+
# Linear interpolation
|
|
340
|
+
t_low = table[lower]
|
|
341
|
+
t_high = table[upper]
|
|
342
|
+
fraction = (df - lower).to_f / (upper - lower)
|
|
343
|
+
t_low + (fraction * (t_high - t_low))
|
|
344
|
+
end
|
|
118
345
|
end
|
|
119
346
|
end
|
|
120
347
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-math_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Descriptive statistics, linear interpolation, rounding modes, and moving
|
|
14
14
|
averages. Lightweight math toolkit with zero dependencies.
|
|
@@ -24,6 +24,7 @@ files:
|
|
|
24
24
|
- lib/philiprehberger/math_kit.rb
|
|
25
25
|
- lib/philiprehberger/math_kit/interpolation.rb
|
|
26
26
|
- lib/philiprehberger/math_kit/moving_average.rb
|
|
27
|
+
- lib/philiprehberger/math_kit/regression.rb
|
|
27
28
|
- lib/philiprehberger/math_kit/round.rb
|
|
28
29
|
- lib/philiprehberger/math_kit/stats.rb
|
|
29
30
|
- lib/philiprehberger/math_kit/version.rb
|
|
@@ -54,5 +55,5 @@ requirements: []
|
|
|
54
55
|
rubygems_version: 3.5.22
|
|
55
56
|
signing_key:
|
|
56
57
|
specification_version: 4
|
|
57
|
-
summary:
|
|
58
|
+
summary: Statistics, interpolation, rounding modes, and moving averages for Ruby
|
|
58
59
|
test_files: []
|