statistics.rb 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +92 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +80 -0
- data/Rakefile +7 -0
- data/lib/Statistics/Histogram/Bin.rb +76 -0
- data/lib/Statistics/Histogram.rb +46 -0
- data/lib/Statistics/VERSION.rb +6 -0
- data/lib/statistics.rb +4 -0
- data/statistics.rb.gemspec +46 -0
- data/test/Statistics/Histogram/Bin_test.rb +95 -0
- data/test/Statistics/Histogram_test.rb +147 -0
- metadata +80 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: d48404847c10c54ef090f41c5dc470e3ca6409ab07918828b6181e54836bc22b
|
|
4
|
+
data.tar.gz: 42f80e662e6834081f993c168e96e75451be5a83106bbe8ebe0a2260a47f84c1
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: eca8442077f05a25a936475b44c5d6cc75b6a1f009ec1ec15e4803d4b43727c554b4036a1c3ae44e674acacb4c5c0d4605aca24783998cdd13962a86bca887f5
|
|
7
|
+
data.tar.gz: 14e9b21b4db384c2bd01bed96d7889c88949fb45a1e2c352df721d9af974ff1daf91e020bcac211b6f94639aafd3d63a16ac3d9a0d27f38f65ce87ade157cefd
|
data/CHANGELOG
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# CHANGELOG
|
|
2
|
+
|
|
3
|
+
## [0.5.0] - 20260503
|
|
4
|
+
## Bin.bin_for_value, Bin split, gem scaffolding
|
|
5
|
+
|
|
6
|
+
1. + Bin.bin_for_value (wraps index_for_value, now private)
|
|
7
|
+
2. ~ Histogram#allocate_values: Use Bin.bin_for_value
|
|
8
|
+
3. ~ Bin split out to lib/Statistics/Histogram/Bin.rb
|
|
9
|
+
4. + lib/statistics.rb
|
|
10
|
+
5. + lib/Statistics/VERSION.rb
|
|
11
|
+
6. + statistics.rb.gemspec
|
|
12
|
+
7. + Gemfile
|
|
13
|
+
8. + Rakefile
|
|
14
|
+
9. + README.md
|
|
15
|
+
10. + LICENSE
|
|
16
|
+
11. + test/Statistics/Histogram_test.rb
|
|
17
|
+
12. + test/Statistics/Histogram/Bin_test.rb
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## [0.4.0] - 20260502
|
|
21
|
+
## Bin instances and consolidated class methods
|
|
22
|
+
|
|
23
|
+
1. + Bin instances (count-tracking via increment, attr_reader :interval)
|
|
24
|
+
2. ~ Bin.width: handles bin_width, bin_count, zero-range, and method selection
|
|
25
|
+
3. + Bin.boundaries (from 0.3.0)
|
|
26
|
+
4. + Bin.index_for_value (was Histogram#index_for_value)
|
|
27
|
+
5. + Bin.data_range
|
|
28
|
+
6. ~ class << self for class methods with private boundary
|
|
29
|
+
7. ~ attr_reader :count replaces hand-written method
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
## [0.3.0] - 20260417
|
|
33
|
+
## Hash-based bins, Bin as class-methods-only
|
|
34
|
+
|
|
35
|
+
1. + Bin.boundaries class method
|
|
36
|
+
2. ~ initialize: delegates to Bin.width and Bin.boundaries
|
|
37
|
+
3. ~ Bin: hash-based bins (no Bin instances), Bin is class-methods-only
|
|
38
|
+
4. - index_for_value (inlined back into allocate_values)
|
|
39
|
+
5. - determine_bin_width (replaced by Bin.width delegation)
|
|
40
|
+
6. - zero-range guard
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
## [0.2.0] - 20260417
|
|
44
|
+
## Count-tracking, interval, zero-range guard
|
|
45
|
+
|
|
46
|
+
1. ~ Bin: count-tracking via increment instead of storing values in array
|
|
47
|
+
2. ~ Bin: attr_reader :interval instead of :range
|
|
48
|
+
3. + Bin#empty? checks @count == 0 instead of @values.empty?
|
|
49
|
+
4. + determine_bin_width: zero-range guard
|
|
50
|
+
5. + index_for_value extracted from allocate_values
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
## [0.1.1] - 20260417
|
|
54
|
+
## Extract determine_bin_width
|
|
55
|
+
|
|
56
|
+
1. ~ initialize: extracted determine_bin_width from one-liner
|
|
57
|
+
2. /compute_boundaries/calculate_boundaries/
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
## [0.1.0] - 20260417
|
|
61
|
+
## Bin class
|
|
62
|
+
|
|
63
|
+
1. + Statistics::Histogram::Bin
|
|
64
|
+
2. ~ allocate_values: creates Bin instances instead of hash entries
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
## [0.0.0] - 20260417
|
|
68
|
+
## Statistics::Histogram
|
|
69
|
+
|
|
70
|
+
1. + lib/Statistics/Histogram.rb
|
|
71
|
+
2. - lib/BinWidth.rb
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
## 20260416
|
|
75
|
+
## BinWidth 0.1.0 to 0.2.0: Reintroduce all named strategies
|
|
76
|
+
|
|
77
|
+
1. /0.1.0/0.2.0/
|
|
78
|
+
2. Reintroduce all named strategies.
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
## 20260416
|
|
82
|
+
## BinWidth 0.0.0 to 0.1.0: Tuneable root generalisation
|
|
83
|
+
|
|
84
|
+
1. /0.0.0/0.1.0/
|
|
85
|
+
2. + tuneable_root as the general form
|
|
86
|
+
3. ~ square_root rewritten as range * n^(-1/2) form
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
## 20260416
|
|
90
|
+
## BinWidth 0.0.0
|
|
91
|
+
|
|
92
|
+
1. + lib/BinWidth.rb
|
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 thoran
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# statistics.rb
|
|
2
|
+
|
|
3
|
+
A statistics library for Ruby.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
gem install statistics.rb
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Or add to your Gemfile:
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
gem 'statistics.rb'
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
### Histogram
|
|
20
|
+
|
|
21
|
+
Create a histogram from an array of numeric values:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
require 'statistics.rb'
|
|
25
|
+
|
|
26
|
+
values = [1.2, 3.4, 5.6, 7.8, 2.3, 4.5, 6.7, 8.9, 3.2, 5.4]
|
|
27
|
+
|
|
28
|
+
h = Statistics::Histogram.new(values)
|
|
29
|
+
puts h
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Output:
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
1.20...3.63 | 3 | ****************************************
|
|
36
|
+
3.63...6.07 | 3 | ****************************************
|
|
37
|
+
6.07...8.50 | 2 | ***************************
|
|
38
|
+
8.50...10.93 | 1 | *************
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
#### Automatic bin width
|
|
42
|
+
|
|
43
|
+
By default, bin width is calculated using the square root rule: `data_range / sqrt(n)`.
|
|
44
|
+
|
|
45
|
+
#### Manual bin width
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
h = Statistics::Histogram.new(values, bin_width: 2.0)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
#### Manual bin count
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
h = Statistics::Histogram.new(values, bin_count: 5)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
#### Querying the histogram
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
h.bins # => [#<Bin ...>, #<Bin ...>, ...]
|
|
61
|
+
h.boundaries # => [1.2, 3.77, 6.34, ...]
|
|
62
|
+
h.bin_count # => 4
|
|
63
|
+
h.mode # => #<Bin interval=1.2...3.77 count=3>
|
|
64
|
+
|
|
65
|
+
h.bins.first.interval # => 1.2...3.77
|
|
66
|
+
h.bins.first.count # => 3
|
|
67
|
+
h.bins.first.width # => 2.57
|
|
68
|
+
h.bins.first.empty? # => false
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Roadmap
|
|
72
|
+
|
|
73
|
+
- Optional per-bin value storage
|
|
74
|
+
- Additional bin width methods (Freedman-Diaconis, Scott, Sturges, cube root, tuneable root)
|
|
75
|
+
- Composable statistical primitives (Percentile, StandardDeviation, IQR)
|
|
76
|
+
- Aligned/neat bin boundaries
|
|
77
|
+
|
|
78
|
+
## License
|
|
79
|
+
|
|
80
|
+
MIT
|
data/Rakefile
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Statistics/Histogram/Bin.rb
|
|
2
|
+
# Statistics::Histogram::Bin
|
|
3
|
+
|
|
4
|
+
module Statistics
|
|
5
|
+
class Histogram
|
|
6
|
+
class Bin
|
|
7
|
+
class << self
|
|
8
|
+
def width(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
9
|
+
if bin_width
|
|
10
|
+
bin_width
|
|
11
|
+
elsif bin_count
|
|
12
|
+
data_range(values) / bin_count.to_f
|
|
13
|
+
elsif data_range(values) == 0
|
|
14
|
+
1.0
|
|
15
|
+
else
|
|
16
|
+
send("#{method}_width", values)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def count(values, method: :square_root)
|
|
21
|
+
send("#{method}_count", values)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def data_range(values)
|
|
25
|
+
values.last - values.first
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def boundaries(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
29
|
+
w = bin_width || width(values, bin_count: bin_count, method: method)
|
|
30
|
+
values.first.step(to: values.last + w, by: w).to_a
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def bin_for_value(value, bins, bottom_boundary, bin_width)
|
|
34
|
+
index = index_for_value(value, bins.count, bottom_boundary, bin_width)
|
|
35
|
+
bins[index]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def index_for_value(value, bin_count, bottom_boundary, bin_width)
|
|
41
|
+
i = ((value - bottom_boundary) / bin_width).floor
|
|
42
|
+
i >= bin_count ? bin_count - 1 : i
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def square_root_width(values)
|
|
46
|
+
data_range(values) * values.size ** (-1.0 / 2)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def square_root_count(values)
|
|
50
|
+
Math.sqrt(values.size).ceil
|
|
51
|
+
end
|
|
52
|
+
end # class << self
|
|
53
|
+
|
|
54
|
+
attr_reader :count, :interval
|
|
55
|
+
|
|
56
|
+
def increment
|
|
57
|
+
@count += 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def width
|
|
61
|
+
@interval.end - @interval.begin
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def empty?
|
|
65
|
+
@count == 0
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def initialize(interval)
|
|
71
|
+
@interval = interval
|
|
72
|
+
@count = 0
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Statistics/Histogram.rb
|
|
2
|
+
# Statistics::Histogram
|
|
3
|
+
|
|
4
|
+
require_relative './Histogram/Bin'
|
|
5
|
+
|
|
6
|
+
module Statistics
|
|
7
|
+
class Histogram
|
|
8
|
+
attr_reader :bins, :boundaries
|
|
9
|
+
|
|
10
|
+
def mode
|
|
11
|
+
@bins.max_by(&:count)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def bin_count
|
|
15
|
+
@bins.size
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def to_s
|
|
19
|
+
max_count = @bins.map(&:count).max
|
|
20
|
+
@bins.map do |bin|
|
|
21
|
+
bar = '*' * ((bin.count.to_f / max_count) * 40).round
|
|
22
|
+
format('%8.2f...%-8.2f | %3d | %s', bin.interval.begin, bin.interval.end, bin.count, bar)
|
|
23
|
+
end.join("\n")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def initialize(values, bin_width: nil, bin_count: nil, method: :square_root)
|
|
29
|
+
raise ArgumentError, 'Values must not be empty' if values.empty?
|
|
30
|
+
@values = values.map(&:to_f).sort
|
|
31
|
+
@bin_width = Bin.width(@values, bin_width: bin_width, bin_count: bin_count, method: method)
|
|
32
|
+
@boundaries = Bin.boundaries(@values, bin_width: @bin_width)
|
|
33
|
+
@bins = allocate_values
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def allocate_values
|
|
37
|
+
bins = @boundaries.each_cons(2).map{|lower, upper| Bin.new(lower...upper)}
|
|
38
|
+
bottom_boundary = @boundaries.first
|
|
39
|
+
@values.each do |value|
|
|
40
|
+
bin = Bin.bin_for_value(value, bins, bottom_boundary, @bin_width)
|
|
41
|
+
bin.increment
|
|
42
|
+
end
|
|
43
|
+
bins
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
data/lib/statistics.rb
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require_relative './lib/Statistics/VERSION'
|
|
2
|
+
|
|
3
|
+
class Gem::Specification
|
|
4
|
+
def dependencies=(gems)
|
|
5
|
+
gems.each{|gem| add_dependency(*gem)}
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def development_dependencies=(gems)
|
|
9
|
+
gems.each{|gem| add_development_dependency(*gem)}
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
Gem::Specification.new do |spec|
|
|
14
|
+
spec.name = 'statistics.rb'
|
|
15
|
+
|
|
16
|
+
spec.version = Statistics::VERSION
|
|
17
|
+
spec.date = '2026-05-03'
|
|
18
|
+
|
|
19
|
+
spec.summary = "A statistics library for Ruby."
|
|
20
|
+
spec.description = "A composable statistics library for Ruby. Histogram with automatic bin width calculation and composable Bin class."
|
|
21
|
+
|
|
22
|
+
spec.author = 'thoran'
|
|
23
|
+
spec.email = 'code@thoran.com'
|
|
24
|
+
spec.homepage = 'http://github.com/thoran/statistics'
|
|
25
|
+
spec.license = 'MIT'
|
|
26
|
+
|
|
27
|
+
spec.required_ruby_version = '>= 2.7'
|
|
28
|
+
|
|
29
|
+
spec.files = [
|
|
30
|
+
Dir['lib/**/*.rb'],
|
|
31
|
+
Dir['test/**/*.rb'],
|
|
32
|
+
'CHANGELOG',
|
|
33
|
+
'Gemfile',
|
|
34
|
+
'LICENSE',
|
|
35
|
+
'Rakefile',
|
|
36
|
+
'README.md',
|
|
37
|
+
'statistics.rb.gemspec',
|
|
38
|
+
].flatten
|
|
39
|
+
|
|
40
|
+
spec.require_paths = ['lib']
|
|
41
|
+
|
|
42
|
+
spec.development_dependencies = %w{
|
|
43
|
+
rake
|
|
44
|
+
minitest
|
|
45
|
+
}
|
|
46
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# test/Statistics/Histogram/Bin_test.rb
|
|
2
|
+
|
|
3
|
+
require 'minitest/autorun'
|
|
4
|
+
|
|
5
|
+
require_relative '../../../lib/Statistics/Histogram/Bin'
|
|
6
|
+
|
|
7
|
+
describe Statistics::Histogram::Bin do
|
|
8
|
+
describe '.width' do
|
|
9
|
+
it 'calculates square root width by default' do
|
|
10
|
+
values = (1..100).to_a.map(&:to_f)
|
|
11
|
+
width = Statistics::Histogram::Bin.width(values)
|
|
12
|
+
expected = (values.last - values.first) * values.size ** (-1.0 / 2)
|
|
13
|
+
_(width).must_be_close_to expected
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'returns explicit bin_width when provided' do
|
|
17
|
+
values = (1..100).to_a.map(&:to_f)
|
|
18
|
+
width = Statistics::Histogram::Bin.width(values, bin_width: 7.5)
|
|
19
|
+
_(width).must_equal 7.5
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'calculates width from bin_count when provided' do
|
|
23
|
+
values = (1..100).to_a.map(&:to_f)
|
|
24
|
+
width = Statistics::Histogram::Bin.width(values, bin_count: 10)
|
|
25
|
+
_(width).must_be_close_to 9.9
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'returns 1.0 for zero-range data' do
|
|
29
|
+
values = [5.0, 5.0, 5.0]
|
|
30
|
+
width = Statistics::Histogram::Bin.width(values)
|
|
31
|
+
_(width).must_equal 1.0
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
describe '.boundaries' do
|
|
36
|
+
it 'returns an array starting at the minimum value' do
|
|
37
|
+
values = (1..10).to_a.map(&:to_f)
|
|
38
|
+
boundaries = Statistics::Histogram::Bin.boundaries(values, bin_width: 3.0)
|
|
39
|
+
_(boundaries.first).must_equal 1.0
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'extends past the maximum value' do
|
|
43
|
+
values = (1..10).to_a.map(&:to_f)
|
|
44
|
+
boundaries = Statistics::Histogram::Bin.boundaries(values, bin_width: 3.0)
|
|
45
|
+
_(boundaries.last).must_be :>=, 10.0
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe '.bin_for_value' do
|
|
50
|
+
it 'returns the correct bin' do
|
|
51
|
+
bins = [5.0, 10.0, 15.0, 20.0].each_cons(2).map do |lower, upper|
|
|
52
|
+
Statistics::Histogram::Bin.new(lower...upper)
|
|
53
|
+
end
|
|
54
|
+
bin = Statistics::Histogram::Bin.bin_for_value(7.0, bins, 5.0, 5.0)
|
|
55
|
+
_(bin.interval).must_equal(5.0...10.0)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it 'clamps to the last bin for values at the upper boundary' do
|
|
59
|
+
bins = [5.0, 10.0, 15.0].each_cons(2).map do |lower, upper|
|
|
60
|
+
Statistics::Histogram::Bin.new(lower...upper)
|
|
61
|
+
end
|
|
62
|
+
bin = Statistics::Histogram::Bin.bin_for_value(15.0, bins, 5.0, 5.0)
|
|
63
|
+
_(bin.interval).must_equal(10.0...15.0)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe 'instance methods' do
|
|
68
|
+
before do
|
|
69
|
+
@bin = Statistics::Histogram::Bin.new(1.0...5.0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'starts with count of zero' do
|
|
73
|
+
_(@bin.count).must_equal 0
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
it 'increments count' do
|
|
77
|
+
@bin.increment
|
|
78
|
+
@bin.increment
|
|
79
|
+
_(@bin.count).must_equal 2
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it 'reports width' do
|
|
83
|
+
_(@bin.width).must_equal 4.0
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it 'is empty when count is zero' do
|
|
87
|
+
_(@bin.empty?).must_equal true
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
it 'is not empty after increment' do
|
|
91
|
+
@bin.increment
|
|
92
|
+
_(@bin.empty?).must_equal false
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# test/Statistics/Histogram_test.rb
|
|
2
|
+
|
|
3
|
+
require 'minitest/autorun'
|
|
4
|
+
|
|
5
|
+
require_relative '../../lib/Statistics/Histogram'
|
|
6
|
+
|
|
7
|
+
describe Statistics::Histogram do
|
|
8
|
+
describe 'with default bin width' do
|
|
9
|
+
before do
|
|
10
|
+
@values = (1..25).to_a
|
|
11
|
+
@histogram = Statistics::Histogram.new(@values)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it 'creates bins' do
|
|
15
|
+
_(@histogram.bins).wont_be_empty
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'allocates all values' do
|
|
19
|
+
total = @histogram.bins.map(&:count).sum
|
|
20
|
+
_(total).must_equal @values.size
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'uses square root rule for bin count' do
|
|
24
|
+
_((@histogram.bin_count - Math.sqrt(@values.size).ceil).abs).must_be :<=, 1
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe 'with explicit bin_width' do
|
|
29
|
+
before do
|
|
30
|
+
@values = (1..10).to_a
|
|
31
|
+
@histogram = Statistics::Histogram.new(@values, bin_width: 5.0)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'creates bins of the specified width' do
|
|
35
|
+
_(@histogram.bins.first.width.round(10)).must_equal 5.0
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it 'allocates all values' do
|
|
39
|
+
total = @histogram.bins.map(&:count).sum
|
|
40
|
+
_(total).must_equal @values.size
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe 'with explicit bin_count' do
|
|
45
|
+
before do
|
|
46
|
+
@values = (1..10).to_a
|
|
47
|
+
@histogram = Statistics::Histogram.new(@values, bin_count: 3)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'creates approximately the requested number of bins' do
|
|
51
|
+
_((@histogram.bin_count - 3).abs).must_be :<=, 1
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'allocates all values' do
|
|
55
|
+
total = @histogram.bins.map(&:count).sum
|
|
56
|
+
_(total).must_equal @values.size
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
describe '#mode' do
|
|
61
|
+
it 'returns the bin with the highest count' do
|
|
62
|
+
values = [1, 1, 1, 1, 5, 5, 10]
|
|
63
|
+
histogram = Statistics::Histogram.new(values, bin_width: 3.0)
|
|
64
|
+
mode_bin = histogram.mode
|
|
65
|
+
_(mode_bin.interval.cover?(1)).must_equal true
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
describe '#bin_count' do
|
|
70
|
+
it 'returns the number of bins' do
|
|
71
|
+
values = [1, 2, 3, 4, 5]
|
|
72
|
+
histogram = Statistics::Histogram.new(values, bin_count: 2)
|
|
73
|
+
_(histogram.bin_count).must_be :>=, 1
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
describe '#boundaries' do
|
|
78
|
+
before do
|
|
79
|
+
@values = [5, 10, 15, 20]
|
|
80
|
+
@histogram = Statistics::Histogram.new(@values, bin_width: 5.0)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'starts at the minimum value' do
|
|
84
|
+
_(@histogram.boundaries.first).must_equal 5.0
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it 'extends past the maximum value' do
|
|
88
|
+
_(@histogram.boundaries.last).must_be :>=, 20.0
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'has one more boundary than bins' do
|
|
92
|
+
_(@histogram.boundaries.size).must_equal @histogram.bin_count + 1
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
describe '#to_s' do
|
|
97
|
+
before do
|
|
98
|
+
@values = [1, 2, 3, 4, 5]
|
|
99
|
+
@histogram = Statistics::Histogram.new(@values, bin_count: 2)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it 'returns a string' do
|
|
103
|
+
_(@histogram.to_s).must_be_kind_of String
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it 'contains bar characters' do
|
|
107
|
+
_(@histogram.to_s).must_include '*'
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
describe 'with identical values' do
|
|
112
|
+
before do
|
|
113
|
+
@values = [5, 5, 5, 5, 5]
|
|
114
|
+
@histogram = Statistics::Histogram.new(@values)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it 'does not raise' do
|
|
118
|
+
_(@histogram.bins).wont_be_empty
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
it 'puts all values in one bin' do
|
|
122
|
+
_(@histogram.bins.map(&:count).sum).must_equal 5
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
describe 'with empty values' do
|
|
127
|
+
it 'raises ArgumentError' do
|
|
128
|
+
_{Statistics::Histogram.new([])}.must_raise ArgumentError
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
describe 'with a single value' do
|
|
133
|
+
it 'creates a histogram with one bin' do
|
|
134
|
+
histogram = Statistics::Histogram.new([42])
|
|
135
|
+
_(histogram.bins.map(&:count).sum).must_equal 1
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
describe 'with float values' do
|
|
140
|
+
it 'handles floats correctly' do
|
|
141
|
+
values = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]
|
|
142
|
+
histogram = Statistics::Histogram.new(values, bin_width: 3.0)
|
|
143
|
+
total = histogram.bins.map(&:count).sum
|
|
144
|
+
_(total).must_equal values.size
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: statistics.rb
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.5.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- thoran
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 2026-05-03 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rake
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0'
|
|
19
|
+
type: :development
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: minitest
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
description: A composable statistics library for Ruby. Histogram with automatic bin
|
|
41
|
+
width calculation and composable Bin class.
|
|
42
|
+
email: code@thoran.com
|
|
43
|
+
executables: []
|
|
44
|
+
extensions: []
|
|
45
|
+
extra_rdoc_files: []
|
|
46
|
+
files:
|
|
47
|
+
- CHANGELOG
|
|
48
|
+
- Gemfile
|
|
49
|
+
- LICENSE
|
|
50
|
+
- README.md
|
|
51
|
+
- Rakefile
|
|
52
|
+
- lib/Statistics/Histogram.rb
|
|
53
|
+
- lib/Statistics/Histogram/Bin.rb
|
|
54
|
+
- lib/Statistics/VERSION.rb
|
|
55
|
+
- lib/statistics.rb
|
|
56
|
+
- statistics.rb.gemspec
|
|
57
|
+
- test/Statistics/Histogram/Bin_test.rb
|
|
58
|
+
- test/Statistics/Histogram_test.rb
|
|
59
|
+
homepage: http://github.com/thoran/statistics
|
|
60
|
+
licenses:
|
|
61
|
+
- MIT
|
|
62
|
+
metadata: {}
|
|
63
|
+
rdoc_options: []
|
|
64
|
+
require_paths:
|
|
65
|
+
- lib
|
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
|
+
requirements:
|
|
68
|
+
- - ">="
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: '2.7'
|
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
requirements: []
|
|
77
|
+
rubygems_version: 4.0.10
|
|
78
|
+
specification_version: 4
|
|
79
|
+
summary: A statistics library for Ruby.
|
|
80
|
+
test_files: []
|