philiprehberger-math_kit 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +25 -0
- data/lib/philiprehberger/math_kit/stats.rb +65 -0
- data/lib/philiprehberger/math_kit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bfd866aacb1af07326cd0543912f1d088dec7cc9c7c06f217709a7bbc31f79d1
|
|
4
|
+
data.tar.gz: f747448a80beda06245efebbff03f3fb043c042bf625c9b8a7512debb9847e1b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6b8b029f141507b9cd791c070ae2d9f5cec3f228182a58caaaf1ad7fce42512a2c62d364e8d1a68d7b204e096590d392ebd341042ada39b50c0b35053ed06974
|
|
7
|
+
data.tar.gz: 6ec476f44f0728ef45b6ce64806be76544b39a5f4831d2897f7ea2fd9bb0f9959df4fa1f7fba7a61cd8f842f76976935b47301da3353da340f5916a96748c65e
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.3.0] - 2026-04-10
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `Stats.describe(values)` for summary statistics (count, mean, median, min, max, stddev, variance, percentiles)
|
|
14
|
+
- `Stats.histogram(values, bins:)` for frequency distribution into equal-width bins
|
|
15
|
+
- `Stats.weighted_mean(values, weights:)` for weighted arithmetic mean
|
|
16
|
+
|
|
10
17
|
## [0.2.3] - 2026-04-08
|
|
11
18
|
|
|
12
19
|
### Changed
|
data/README.md
CHANGED
|
@@ -43,6 +43,28 @@ Philiprehberger::MathKit::Stats.sum([1, 2, 3]) # => 6
|
|
|
43
43
|
Philiprehberger::MathKit::Stats.range([1, 5, 3, 9, 2]) # => 8
|
|
44
44
|
```
|
|
45
45
|
|
|
46
|
+
### Summary Statistics
|
|
47
|
+
|
|
48
|
+
```ruby
|
|
49
|
+
Philiprehberger::MathKit::Stats.describe([1, 2, 3, 4, 5])
|
|
50
|
+
# => { count: 5, mean: 3.0, median: 3.0, min: 1.0, max: 5.0,
|
|
51
|
+
# stddev: 1.58..., variance: 2.5, p25: 2.0, p50: 3.0, p75: 4.0 }
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Weighted Mean
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
Philiprehberger::MathKit::Stats.weighted_mean([10, 20, 30], weights: [3, 1, 1])
|
|
58
|
+
# => 16.0
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Histogram
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
Philiprehberger::MathKit::Stats.histogram([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], bins: 5)
|
|
65
|
+
# => [{ min: 1.0, max: 2.8, count: 2 }, { min: 2.8, max: 4.6, count: 2 }, ...]
|
|
66
|
+
```
|
|
67
|
+
|
|
46
68
|
### Skewness and Kurtosis
|
|
47
69
|
|
|
48
70
|
```ruby
|
|
@@ -140,6 +162,9 @@ Philiprehberger::MathKit::MovingAverage.exponential([1, 2, 3, 4, 5], alpha: 0.5)
|
|
|
140
162
|
| `.median_absolute_deviation(values)` | Median absolute deviation |
|
|
141
163
|
| `.trimmed_mean(values, trim: 0.1)` | Trimmed mean (remove fraction from each end) |
|
|
142
164
|
| `.winsorized_mean(values, trim: 0.1)` | Winsorized mean (clamp extremes) |
|
|
165
|
+
| `.describe(values)` | Summary statistics hash (count, mean, median, min, max, stddev, percentiles) |
|
|
166
|
+
| `.histogram(values, bins: 10)` | Frequency distribution as array of bin hashes |
|
|
167
|
+
| `.weighted_mean(values, weights:)` | Weighted arithmetic mean |
|
|
143
168
|
|
|
144
169
|
### `Regression`
|
|
145
170
|
|
|
@@ -291,6 +291,71 @@ module Philiprehberger
|
|
|
291
291
|
mean(winsorized)
|
|
292
292
|
end
|
|
293
293
|
|
|
294
|
+
# Summary statistics for a dataset
|
|
295
|
+
#
|
|
296
|
+
# @param values [Array<Numeric>] the input values
|
|
297
|
+
# @return [Hash] with :count, :mean, :median, :min, :max, :stddev, :variance, :p25, :p50, :p75
|
|
298
|
+
# @raise [ArgumentError] if values is empty
|
|
299
|
+
def describe(values)
|
|
300
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
301
|
+
|
|
302
|
+
{
|
|
303
|
+
count: values.size,
|
|
304
|
+
mean: mean(values),
|
|
305
|
+
median: median(values),
|
|
306
|
+
min: values.min.to_f,
|
|
307
|
+
max: values.max.to_f,
|
|
308
|
+
stddev: values.size >= 2 ? stddev(values, population: false) : 0.0,
|
|
309
|
+
variance: values.size >= 2 ? variance(values, population: false) : 0.0,
|
|
310
|
+
p25: percentile(values, 25),
|
|
311
|
+
p50: percentile(values, 50),
|
|
312
|
+
p75: percentile(values, 75)
|
|
313
|
+
}
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Frequency distribution (histogram)
|
|
317
|
+
#
|
|
318
|
+
# @param values [Array<Numeric>] the input values
|
|
319
|
+
# @param bins [Integer] number of bins (default: 10)
|
|
320
|
+
# @return [Array<Hash>] array of { min:, max:, count: } hashes
|
|
321
|
+
# @raise [ArgumentError] if values is empty or bins < 1
|
|
322
|
+
def histogram(values, bins: 10)
|
|
323
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
324
|
+
raise ArgumentError, 'bins must be at least 1' if bins < 1
|
|
325
|
+
|
|
326
|
+
min_val = values.min.to_f
|
|
327
|
+
max_val = values.max.to_f
|
|
328
|
+
width = max_val == min_val ? 1.0 : (max_val - min_val) / bins.to_f
|
|
329
|
+
|
|
330
|
+
result = Array.new(bins) do |i|
|
|
331
|
+
{ min: min_val + (i * width), max: min_val + ((i + 1) * width), count: 0 }
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
values.each do |v|
|
|
335
|
+
idx = width.zero? ? 0 : ((v - min_val) / width).floor
|
|
336
|
+
idx = bins - 1 if idx >= bins
|
|
337
|
+
result[idx][:count] += 1
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
result
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Weighted arithmetic mean
|
|
344
|
+
#
|
|
345
|
+
# @param values [Array<Numeric>] the input values
|
|
346
|
+
# @param weights [Array<Numeric>] the corresponding weights
|
|
347
|
+
# @return [Float] the weighted mean
|
|
348
|
+
# @raise [ArgumentError] if arrays differ in size, are empty, or weights sum to zero
|
|
349
|
+
def weighted_mean(values, weights:)
|
|
350
|
+
raise ArgumentError, 'values must not be empty' if values.empty?
|
|
351
|
+
raise ArgumentError, 'values and weights must have the same size' if values.size != weights.size
|
|
352
|
+
|
|
353
|
+
total_weight = weights.sum.to_f
|
|
354
|
+
raise ArgumentError, 'weights must not sum to zero' if total_weight.zero?
|
|
355
|
+
|
|
356
|
+
values.zip(weights).sum { |v, w| v * w } / total_weight
|
|
357
|
+
end
|
|
358
|
+
|
|
294
359
|
private
|
|
295
360
|
|
|
296
361
|
# T-distribution critical values for common confidence levels
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-math_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-10 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Descriptive statistics, linear interpolation, rounding modes, and moving
|
|
14
14
|
averages. Lightweight math toolkit with zero dependencies.
|