statistics.rb 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +17 -1
- data/lib/Statistics/Bin.rb +71 -9
- data/lib/Statistics/Histogram.rb +2 -2
- data/lib/Statistics/VERSION.rb +1 -1
- data/test/Statistics/Bin_test.rb +91 -0
- data/test/Statistics/Histogram_test.rb +10 -0
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 72207b38b9be8be456975d549e38cb32563f900d4f066170da98b991be69ff54
|
|
4
|
+
data.tar.gz: 7b06af6c2f260c12d896a5f6fe06544d03d3a8fb3832b173710dad95cb843383
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f35fc1c57fa9f3e46b86d423489e3d9fa057f7cf24423ffae38dfa4ddd47d47cbfce4251e93f79c640fdaae2a413f7f898463e67d85aea53470a1384a7f4d41d
|
|
7
|
+
data.tar.gz: 78f3ce433eabecb1979ea29ae4aaf8dff05290406476f1c8ba0672f4ee54d46887b79175b60091cc30c95a0a2ee000327f8127cf32c57011214bc10ededcf62f
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
|
+
## [0.7.0] - 20260515
|
|
4
|
+
## + cube\_root, freedman\_diaconis, scott, tuneable\_root, sturges bin width methods
|
|
5
|
+
|
|
6
|
+
1. + Bin::METHODS, Bin::DEFAULT\_METHOD, Bin::DEFAULT\_FACTOR
|
|
7
|
+
2. + Bin.cube\_root\_width, Bin.cube\_root\_count
|
|
8
|
+
3. + Bin.tuneable\_root\_width, Bin.tuneable\_root\_count
|
|
9
|
+
4. + Bin.freedman\_diaconis\_width, Bin.freedman\_diaconis\_count
|
|
10
|
+
5. + Bin.scott\_width, Bin.scott\_count
|
|
11
|
+
6. + Bin.sturges\_width, Bin.sturges\_count
|
|
12
|
+
7. ~ Bin.width, Bin.count, Bin.boundaries: + factor: keyword argument (parameter of :tuneable\_root, defaults to DEFAULT\_FACTOR)
|
|
13
|
+
8. + Bin.validate! (empty values, unknown method, non-positive factor)
|
|
14
|
+
9. ~ Bin.data\_range: now private
|
|
15
|
+
10. ~ Bin.rb: + require\_relative for IQR and StandardDeviation
|
|
16
|
+
11. ~ Histogram#initialize: + factor: keyword argument, forwarded to Bin.width
|
|
17
|
+
12. ~ test/Statistics/Bin\_test.rb: cases for new strategies, .count parity, validation
|
|
18
|
+
13. + test/Statistics/Histogram\_test.rb: factor passthrough case
|
|
19
|
+
14. ~ README.md: Document strategies; remove from Roadmap.
|
|
20
|
+
15. ~ lib/Statistics/VERSION.rb: /0.6.1/0.7.0/
|
|
21
|
+
|
|
22
|
+
|
|
3
23
|
## [0.6.1] - 20260513
|
|
4
24
|
## Statistics::Histogram::Bin --> Statistics::Bin
|
|
5
25
|
|
|
@@ -11,6 +31,7 @@
|
|
|
11
31
|
6. ~ README.md: + Contributions section
|
|
12
32
|
7. ~ lib/Statistics/VERSION.rb: /0.6.0/0.6.1/
|
|
13
33
|
|
|
34
|
+
|
|
14
35
|
## [0.6.0] - 20260504
|
|
15
36
|
## + Percentile, StandardDeviation, IQR
|
|
16
37
|
|
data/README.md
CHANGED
|
@@ -42,6 +42,23 @@ Output:
|
|
|
42
42
|
|
|
43
43
|
By default, bin width is calculated using the square root rule: `data_range / sqrt(n)`.
|
|
44
44
|
|
|
45
|
+
Other strategies are available via the `method:` option:
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
h = Statistics::Histogram.new(values, method: :square_root) # default
|
|
49
|
+
h = Statistics::Histogram.new(values, method: :cube_root)
|
|
50
|
+
h = Statistics::Histogram.new(values, method: :freedman_diaconis)
|
|
51
|
+
h = Statistics::Histogram.new(values, method: :scott)
|
|
52
|
+
h = Statistics::Histogram.new(values, method: :sturges)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The `tuneable_root` method takes a `factor` (default `2.0`, equivalent to `square_root`):
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
h = Statistics::Histogram.new(values, method: :tuneable_root, factor: 4.0)
|
|
59
|
+
Statistics::Bin.width(values, method: :tuneable_root, factor: 4.0)
|
|
60
|
+
```
|
|
61
|
+
|
|
45
62
|
#### Manual bin width
|
|
46
63
|
|
|
47
64
|
```ruby
|
|
@@ -98,7 +115,6 @@ Statistics::IQR.of(values) # => 4.5
|
|
|
98
115
|
## Roadmap
|
|
99
116
|
|
|
100
117
|
- Optional per-bin value storage
|
|
101
|
-
- Additional bin width methods (Freedman-Diaconis, Scott, Sturges, cube root, tuneable root)
|
|
102
118
|
- Aligned/neat bin boundaries
|
|
103
119
|
|
|
104
120
|
## Contributing
|
data/lib/Statistics/Bin.rb
CHANGED
|
@@ -1,31 +1,43 @@
|
|
|
1
1
|
# Statistics/Bin.rb
|
|
2
2
|
# Statistics::Bin
|
|
3
3
|
|
|
4
|
+
require_relative './IQR'
|
|
5
|
+
require_relative './StandardDeviation'
|
|
6
|
+
|
|
4
7
|
module Statistics
|
|
5
8
|
class Bin
|
|
9
|
+
METHODS = [:square_root, :cube_root, :freedman_diaconis, :scott, :sturges, :tuneable_root].freeze
|
|
10
|
+
DEFAULT_METHOD = :square_root
|
|
11
|
+
DEFAULT_FACTOR = 2.0
|
|
12
|
+
|
|
6
13
|
class << self
|
|
7
|
-
def width(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
14
|
+
def width(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
|
|
15
|
+
validate!(values, method: method, factor: factor)
|
|
8
16
|
if bin_width
|
|
9
17
|
bin_width
|
|
10
18
|
elsif bin_count
|
|
11
19
|
data_range(values) / bin_count.to_f
|
|
12
20
|
elsif data_range(values) == 0
|
|
13
21
|
1.0
|
|
22
|
+
elsif method == :tuneable_root
|
|
23
|
+
tuneable_root_width(values, factor || DEFAULT_FACTOR)
|
|
14
24
|
else
|
|
15
25
|
send("#{method}_width", values)
|
|
16
26
|
end
|
|
17
27
|
end
|
|
18
28
|
|
|
19
|
-
def count(values, method: :square_root)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
29
|
+
def count(values, method: :square_root, factor: nil)
|
|
30
|
+
validate!(values, method: method, factor: factor)
|
|
31
|
+
if method == :tuneable_root
|
|
32
|
+
tuneable_root_count(values, factor || DEFAULT_FACTOR)
|
|
33
|
+
else
|
|
34
|
+
send("#{method}_count", values)
|
|
35
|
+
end
|
|
25
36
|
end
|
|
26
37
|
|
|
27
|
-
def boundaries(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
28
|
-
|
|
38
|
+
def boundaries(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
|
|
39
|
+
validate!(values, method: method, factor: factor)
|
|
40
|
+
w = bin_width || width(values, bin_count: bin_count, method: method, factor: factor)
|
|
29
41
|
values.first.step(to: values.last + w, by: w).to_a
|
|
30
42
|
end
|
|
31
43
|
|
|
@@ -36,6 +48,10 @@ module Statistics
|
|
|
36
48
|
|
|
37
49
|
private
|
|
38
50
|
|
|
51
|
+
def data_range(values)
|
|
52
|
+
values.last - values.first
|
|
53
|
+
end
|
|
54
|
+
|
|
39
55
|
def index_for_value(value, bin_count, bottom_boundary, bin_width)
|
|
40
56
|
i = ((value - bottom_boundary) / bin_width).floor
|
|
41
57
|
i >= bin_count ? bin_count - 1 : i
|
|
@@ -48,6 +64,52 @@ module Statistics
|
|
|
48
64
|
def square_root_count(values)
|
|
49
65
|
Math.sqrt(values.size).ceil
|
|
50
66
|
end
|
|
67
|
+
|
|
68
|
+
def cube_root_width(values)
|
|
69
|
+
data_range(values) * values.size ** (-1.0 / 3)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def cube_root_count(values)
|
|
73
|
+
(values.size ** (1.0 / 3)).ceil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def tuneable_root_width(values, factor)
|
|
77
|
+
data_range(values) * values.size ** (-1.0 / factor)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def tuneable_root_count(values, factor)
|
|
81
|
+
(values.size ** (1.0 / factor)).ceil
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def freedman_diaconis_width(values)
|
|
85
|
+
2.0 * IQR.of(values) * values.size ** (-1.0 / 3)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def freedman_diaconis_count(values)
|
|
89
|
+
(data_range(values) / freedman_diaconis_width(values)).ceil
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def scott_width(values)
|
|
93
|
+
3.49 * StandardDeviation.of(values) * values.size ** (-1.0 / 3)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def scott_count(values)
|
|
97
|
+
(data_range(values) / scott_width(values)).ceil
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def sturges_width(values)
|
|
101
|
+
data_range(values) / sturges_count(values)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def sturges_count(values)
|
|
105
|
+
Math.log2(values.size).ceil + 1
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def validate!(values, method:, factor: nil)
|
|
109
|
+
raise ArgumentError, 'Values must not be empty' if values.empty?
|
|
110
|
+
raise ArgumentError, "Unknown method: #{method}" unless METHODS.include?(method)
|
|
111
|
+
raise ArgumentError, 'Factor must be positive' if factor && factor <= 0
|
|
112
|
+
end
|
|
51
113
|
end # class << self
|
|
52
114
|
|
|
53
115
|
attr_reader :count, :interval
|
data/lib/Statistics/Histogram.rb
CHANGED
|
@@ -25,10 +25,10 @@ module Statistics
|
|
|
25
25
|
|
|
26
26
|
private
|
|
27
27
|
|
|
28
|
-
def initialize(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
28
|
+
def initialize(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
|
|
29
29
|
raise ArgumentError, 'Values must not be empty' if values.empty?
|
|
30
30
|
@values = values.map(&:to_f).sort
|
|
31
|
-
@bin_width = Bin.width(@values, bin_width: bin_width, bin_count: bin_count, method: method)
|
|
31
|
+
@bin_width = Bin.width(@values, bin_width: bin_width, bin_count: bin_count, method: method, factor: factor)
|
|
32
32
|
@boundaries = Bin.boundaries(@values, bin_width: @bin_width)
|
|
33
33
|
@bins = allocate_values
|
|
34
34
|
end
|
data/lib/Statistics/VERSION.rb
CHANGED
data/test/Statistics/Bin_test.rb
CHANGED
|
@@ -30,6 +30,97 @@ describe Statistics::Bin do
|
|
|
30
30
|
width = Statistics::Bin.width(values)
|
|
31
31
|
_(width).must_equal 1.0
|
|
32
32
|
end
|
|
33
|
+
|
|
34
|
+
it 'calculates cube root width' do
|
|
35
|
+
values = (1..100).to_a.map(&:to_f)
|
|
36
|
+
width = Statistics::Bin.width(values, method: :cube_root)
|
|
37
|
+
expected = (values.last - values.first) * values.size ** (-1.0 / 3)
|
|
38
|
+
_(width).must_be_close_to expected
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it 'calculates tuneable root width with custom factor' do
|
|
42
|
+
values = (1..100).to_a.map(&:to_f)
|
|
43
|
+
width = Statistics::Bin.width(values, method: :tuneable_root, factor: 4.0)
|
|
44
|
+
expected = (values.last - values.first) * values.size ** (-1.0 / 4)
|
|
45
|
+
_(width).must_be_close_to expected
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it 'tuneable root with default factor matches square root width' do
|
|
49
|
+
values = (1..100).to_a.map(&:to_f)
|
|
50
|
+
width = Statistics::Bin.width(values, method: :tuneable_root)
|
|
51
|
+
expected = Statistics::Bin.width(values, method: :square_root)
|
|
52
|
+
_(width).must_be_close_to expected
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it 'calculates Freedman-Diaconis width' do
|
|
56
|
+
values = (1..100).to_a.map(&:to_f)
|
|
57
|
+
width = Statistics::Bin.width(values, method: :freedman_diaconis)
|
|
58
|
+
expected = 2.0 * Statistics::IQR.of(values) * values.size ** (-1.0 / 3)
|
|
59
|
+
_(width).must_be_close_to expected
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it 'calculates Scott width' do
|
|
63
|
+
values = (1..100).to_a.map(&:to_f)
|
|
64
|
+
width = Statistics::Bin.width(values, method: :scott)
|
|
65
|
+
expected = 3.49 * Statistics::StandardDeviation.of(values) * values.size ** (-1.0 / 3)
|
|
66
|
+
_(width).must_be_close_to expected
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it 'calculates Sturges width' do
|
|
70
|
+
values = (1..100).to_a.map(&:to_f)
|
|
71
|
+
width = Statistics::Bin.width(values, method: :sturges)
|
|
72
|
+
expected_count = Math.log2(values.size).ceil + 1
|
|
73
|
+
expected = (values.last - values.first) / expected_count
|
|
74
|
+
_(width).must_be_close_to expected
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it 'raises for empty values' do
|
|
78
|
+
_{Statistics::Bin.width([])}.must_raise ArgumentError
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it 'raises for unknown method' do
|
|
82
|
+
_{Statistics::Bin.width([1.0, 2.0], method: :bogus)}.must_raise ArgumentError
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
it 'raises for non-positive factor' do
|
|
86
|
+
_{Statistics::Bin.width([1.0, 2.0], factor: 0)}.must_raise ArgumentError
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
describe '.count' do
|
|
91
|
+
it 'calculates square root count by default' do
|
|
92
|
+
values = (1..100).to_a.map(&:to_f)
|
|
93
|
+
_(Statistics::Bin.count(values)).must_equal Math.sqrt(values.size).ceil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it 'calculates cube root count' do
|
|
97
|
+
values = (1..100).to_a.map(&:to_f)
|
|
98
|
+
_(Statistics::Bin.count(values, method: :cube_root)).must_equal (values.size ** (1.0 / 3)).ceil
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it 'calculates tuneable root count with custom factor' do
|
|
102
|
+
values = (1..100).to_a.map(&:to_f)
|
|
103
|
+
_(Statistics::Bin.count(values, method: :tuneable_root, factor: 4.0)).must_equal (values.size ** (1.0 / 4)).ceil
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it 'calculates Freedman-Diaconis count' do
|
|
107
|
+
values = (1..100).to_a.map(&:to_f)
|
|
108
|
+
expected_width = 2.0 * Statistics::IQR.of(values) * values.size ** (-1.0 / 3)
|
|
109
|
+
expected = ((values.last - values.first) / expected_width).ceil
|
|
110
|
+
_(Statistics::Bin.count(values, method: :freedman_diaconis)).must_equal expected
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it 'calculates Scott count' do
|
|
114
|
+
values = (1..100).to_a.map(&:to_f)
|
|
115
|
+
expected_width = 3.49 * Statistics::StandardDeviation.of(values) * values.size ** (-1.0 / 3)
|
|
116
|
+
expected = ((values.last - values.first) / expected_width).ceil
|
|
117
|
+
_(Statistics::Bin.count(values, method: :scott)).must_equal expected
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it 'calculates Sturges count' do
|
|
121
|
+
values = (1..100).to_a.map(&:to_f)
|
|
122
|
+
_(Statistics::Bin.count(values, method: :sturges)).must_equal Math.log2(values.size).ceil + 1
|
|
123
|
+
end
|
|
33
124
|
end
|
|
34
125
|
|
|
35
126
|
describe '.boundaries' do
|
|
@@ -136,6 +136,16 @@ describe Statistics::Histogram do
|
|
|
136
136
|
end
|
|
137
137
|
end
|
|
138
138
|
|
|
139
|
+
describe 'with method: :tuneable_root' do
|
|
140
|
+
it 'passes factor through to Bin.width' do
|
|
141
|
+
values = (1..100).to_a
|
|
142
|
+
h_default = Statistics::Histogram.new(values, method: :tuneable_root)
|
|
143
|
+
h_custom = Statistics::Histogram.new(values, method: :tuneable_root, factor: 4.0)
|
|
144
|
+
_(h_default.bins.first.width).must_be_close_to Statistics::Bin.width(values.map(&:to_f), method: :tuneable_root)
|
|
145
|
+
_(h_custom.bins.first.width).must_be_close_to Statistics::Bin.width(values.map(&:to_f), method: :tuneable_root, factor: 4.0)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
139
149
|
describe 'with float values' do
|
|
140
150
|
it 'handles floats correctly' do
|
|
141
151
|
values = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]
|