statistics.rb 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9db0156fb926432eb2d48821c410a0badec96b32dacd485893b0bdf8e7b25cb5
4
- data.tar.gz: 77ac6c90e08b0017d1f66ad427ed0c5f0560ef1732fa79627bb0a2160eea292e
3
+ metadata.gz: 72207b38b9be8be456975d549e38cb32563f900d4f066170da98b991be69ff54
4
+ data.tar.gz: 7b06af6c2f260c12d896a5f6fe06544d03d3a8fb3832b173710dad95cb843383
5
5
  SHA512:
6
- metadata.gz: 43cf596e2166e2cd345528dd6aae8aaadcf78e9bfe62cfe5f28903729bca2ae60a9b142796ab6ab699da57754b27480c4b7c07dae81c37f569f244064daa05f1
7
- data.tar.gz: e0f26dd100a0741855f46b466b5c9c28a673f386c674a65feafe2cf5793f7353d4791dc54e1e4f1c95f2200724caefcbcc7901b71a830743f7cb97f653ba3692
6
+ metadata.gz: f35fc1c57fa9f3e46b86d423489e3d9fa057f7cf24423ffae38dfa4ddd47d47cbfce4251e93f79c640fdaae2a413f7f898463e67d85aea53470a1384a7f4d41d
7
+ data.tar.gz: 78f3ce433eabecb1979ea29ae4aaf8dff05290406476f1c8ba0672f4ee54d46887b79175b60091cc30c95a0a2ee000327f8127cf32c57011214bc10ededcf62f
data/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## [0.7.0] - 20260515
4
+ ## + cube\_root, freedman\_diaconis, scott, tuneable\_root, sturges bin width methods
5
+
6
+ 1. + Bin::METHODS, Bin::DEFAULT\_METHOD, Bin::DEFAULT\_FACTOR
7
+ 2. + Bin.cube\_root\_width, Bin.cube\_root\_count
8
+ 3. + Bin.tuneable\_root\_width, Bin.tuneable\_root\_count
9
+ 4. + Bin.freedman\_diaconis\_width, Bin.freedman\_diaconis\_count
10
+ 5. + Bin.scott\_width, Bin.scott\_count
11
+ 6. + Bin.sturges\_width, Bin.sturges\_count
12
+ 7. ~ Bin.width, Bin.count, Bin.boundaries: + factor: keyword argument (parameter of :tuneable\_root, defaults to DEFAULT\_FACTOR)
13
+ 8. + Bin.validate! (empty values, unknown method, non-positive factor)
14
+ 9. ~ Bin.data\_range: now private
15
+ 10. ~ Bin.rb: + require\_relative for IQR and StandardDeviation
16
+ 11. ~ Histogram#initialize: + factor: keyword argument, forwarded to Bin.width
17
+ 12. ~ test/Statistics/Bin\_test.rb: cases for new strategies, .count parity, validation
18
+ 13. + test/Statistics/Histogram\_test.rb: factor passthrough case
19
+ 14. ~ README.md: Document strategies; remove from Roadmap.
20
+ 15. ~ lib/Statistics/VERSION.rb: /0.6.1/0.7.0/
21
+
22
+
3
23
  ## [0.6.1] - 20260513
4
24
  ## Statistics::Histogram::Bin --> Statistics::Bin
5
25
 
@@ -11,6 +31,7 @@
11
31
  6. ~ README.md: + Contributions section
12
32
  7. ~ lib/Statistics/VERSION.rb: /0.6.0/0.6.1/
13
33
 
34
+
14
35
  ## [0.6.0] - 20260504
15
36
  ## + Percentile, StandardDeviation, IQR
16
37
 
data/README.md CHANGED
@@ -42,6 +42,23 @@ Output:
42
42
 
43
43
  By default, bin width is calculated using the square root rule: `data_range / sqrt(n)`.
44
44
 
45
+ Other strategies are available via the `method:` option:
46
+
47
+ ```ruby
48
+ h = Statistics::Histogram.new(values, method: :square_root) # default
49
+ h = Statistics::Histogram.new(values, method: :cube_root)
50
+ h = Statistics::Histogram.new(values, method: :freedman_diaconis)
51
+ h = Statistics::Histogram.new(values, method: :scott)
52
+ h = Statistics::Histogram.new(values, method: :sturges)
53
+ ```
54
+
55
+ The `tuneable_root` method takes a `factor` (default `2.0`, equivalent to `square_root`):
56
+
57
+ ```ruby
58
+ h = Statistics::Histogram.new(values, method: :tuneable_root, factor: 4.0)
59
+ Statistics::Bin.width(values, method: :tuneable_root, factor: 4.0)
60
+ ```
61
+
45
62
  #### Manual bin width
46
63
 
47
64
  ```ruby
@@ -98,7 +115,6 @@ Statistics::IQR.of(values) # => 4.5
98
115
  ## Roadmap
99
116
 
100
117
  - Optional per-bin value storage
101
- - Additional bin width methods (Freedman-Diaconis, Scott, Sturges, cube root, tuneable root)
102
118
  - Aligned/neat bin boundaries
103
119
 
104
120
  ## Contributing
@@ -1,31 +1,43 @@
1
1
  # Statistics/Bin.rb
2
2
  # Statistics::Bin
3
3
 
4
+ require_relative './IQR'
5
+ require_relative './StandardDeviation'
6
+
4
7
  module Statistics
5
8
  class Bin
9
+ METHODS = [:square_root, :cube_root, :freedman_diaconis, :scott, :sturges, :tuneable_root].freeze
10
+ DEFAULT_METHOD = :square_root
11
+ DEFAULT_FACTOR = 2.0
12
+
6
13
  class << self
7
- def width(values, bin_width: nil, bin_count: nil, method: :square_root)
14
+ def width(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
15
+ validate!(values, method: method, factor: factor)
8
16
  if bin_width
9
17
  bin_width
10
18
  elsif bin_count
11
19
  data_range(values) / bin_count.to_f
12
20
  elsif data_range(values) == 0
13
21
  1.0
22
+ elsif method == :tuneable_root
23
+ tuneable_root_width(values, factor || DEFAULT_FACTOR)
14
24
  else
15
25
  send("#{method}_width", values)
16
26
  end
17
27
  end
18
28
 
19
- def count(values, method: :square_root)
20
- send("#{method}_count", values)
21
- end
22
-
23
- def data_range(values)
24
- values.last - values.first
29
+ def count(values, method: :square_root, factor: nil)
30
+ validate!(values, method: method, factor: factor)
31
+ if method == :tuneable_root
32
+ tuneable_root_count(values, factor || DEFAULT_FACTOR)
33
+ else
34
+ send("#{method}_count", values)
35
+ end
25
36
  end
26
37
 
27
- def boundaries(values, bin_width: nil, bin_count: nil, method: :square_root)
28
- w = bin_width || width(values, bin_count: bin_count, method: method)
38
+ def boundaries(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
39
+ validate!(values, method: method, factor: factor)
40
+ w = bin_width || width(values, bin_count: bin_count, method: method, factor: factor)
29
41
  values.first.step(to: values.last + w, by: w).to_a
30
42
  end
31
43
 
@@ -36,6 +48,10 @@ module Statistics
36
48
 
37
49
  private
38
50
 
51
+ def data_range(values)
52
+ values.last - values.first
53
+ end
54
+
39
55
  def index_for_value(value, bin_count, bottom_boundary, bin_width)
40
56
  i = ((value - bottom_boundary) / bin_width).floor
41
57
  i >= bin_count ? bin_count - 1 : i
@@ -48,6 +64,52 @@ module Statistics
48
64
  def square_root_count(values)
49
65
  Math.sqrt(values.size).ceil
50
66
  end
67
+
68
+ def cube_root_width(values)
69
+ data_range(values) * values.size ** (-1.0 / 3)
70
+ end
71
+
72
+ def cube_root_count(values)
73
+ (values.size ** (1.0 / 3)).ceil
74
+ end
75
+
76
+ def tuneable_root_width(values, factor)
77
+ data_range(values) * values.size ** (-1.0 / factor)
78
+ end
79
+
80
+ def tuneable_root_count(values, factor)
81
+ (values.size ** (1.0 / factor)).ceil
82
+ end
83
+
84
+ def freedman_diaconis_width(values)
85
+ 2.0 * IQR.of(values) * values.size ** (-1.0 / 3)
86
+ end
87
+
88
+ def freedman_diaconis_count(values)
89
+ (data_range(values) / freedman_diaconis_width(values)).ceil
90
+ end
91
+
92
+ def scott_width(values)
93
+ 3.49 * StandardDeviation.of(values) * values.size ** (-1.0 / 3)
94
+ end
95
+
96
+ def scott_count(values)
97
+ (data_range(values) / scott_width(values)).ceil
98
+ end
99
+
100
+ def sturges_width(values)
101
+ data_range(values) / sturges_count(values)
102
+ end
103
+
104
+ def sturges_count(values)
105
+ Math.log2(values.size).ceil + 1
106
+ end
107
+
108
+ def validate!(values, method:, factor: nil)
109
+ raise ArgumentError, 'Values must not be empty' if values.empty?
110
+ raise ArgumentError, "Unknown method: #{method}" unless METHODS.include?(method)
111
+ raise ArgumentError, 'Factor must be positive' if factor && factor <= 0
112
+ end
51
113
  end # class << self
52
114
 
53
115
  attr_reader :count, :interval
@@ -25,10 +25,10 @@ module Statistics
25
25
 
26
26
  private
27
27
 
28
- def initialize(values, bin_width: nil, bin_count: nil, method: :square_root)
28
+ def initialize(values, bin_width: nil, bin_count: nil, method: :square_root, factor: nil)
29
29
  raise ArgumentError, 'Values must not be empty' if values.empty?
30
30
  @values = values.map(&:to_f).sort
31
- @bin_width = Bin.width(@values, bin_width: bin_width, bin_count: bin_count, method: method)
31
+ @bin_width = Bin.width(@values, bin_width: bin_width, bin_count: bin_count, method: method, factor: factor)
32
32
  @boundaries = Bin.boundaries(@values, bin_width: @bin_width)
33
33
  @bins = allocate_values
34
34
  end
@@ -2,5 +2,5 @@
2
2
  # Statistics::VERSION
3
3
 
4
4
  module Statistics
5
- VERSION = '0.6.1'
5
+ VERSION = '0.7.0'
6
6
  end
@@ -30,6 +30,97 @@ describe Statistics::Bin do
30
30
  width = Statistics::Bin.width(values)
31
31
  _(width).must_equal 1.0
32
32
  end
33
+
34
+ it 'calculates cube root width' do
35
+ values = (1..100).to_a.map(&:to_f)
36
+ width = Statistics::Bin.width(values, method: :cube_root)
37
+ expected = (values.last - values.first) * values.size ** (-1.0 / 3)
38
+ _(width).must_be_close_to expected
39
+ end
40
+
41
+ it 'calculates tuneable root width with custom factor' do
42
+ values = (1..100).to_a.map(&:to_f)
43
+ width = Statistics::Bin.width(values, method: :tuneable_root, factor: 4.0)
44
+ expected = (values.last - values.first) * values.size ** (-1.0 / 4)
45
+ _(width).must_be_close_to expected
46
+ end
47
+
48
+ it 'tuneable root with default factor matches square root width' do
49
+ values = (1..100).to_a.map(&:to_f)
50
+ width = Statistics::Bin.width(values, method: :tuneable_root)
51
+ expected = Statistics::Bin.width(values, method: :square_root)
52
+ _(width).must_be_close_to expected
53
+ end
54
+
55
+ it 'calculates Freedman-Diaconis width' do
56
+ values = (1..100).to_a.map(&:to_f)
57
+ width = Statistics::Bin.width(values, method: :freedman_diaconis)
58
+ expected = 2.0 * Statistics::IQR.of(values) * values.size ** (-1.0 / 3)
59
+ _(width).must_be_close_to expected
60
+ end
61
+
62
+ it 'calculates Scott width' do
63
+ values = (1..100).to_a.map(&:to_f)
64
+ width = Statistics::Bin.width(values, method: :scott)
65
+ expected = 3.49 * Statistics::StandardDeviation.of(values) * values.size ** (-1.0 / 3)
66
+ _(width).must_be_close_to expected
67
+ end
68
+
69
+ it 'calculates Sturges width' do
70
+ values = (1..100).to_a.map(&:to_f)
71
+ width = Statistics::Bin.width(values, method: :sturges)
72
+ expected_count = Math.log2(values.size).ceil + 1
73
+ expected = (values.last - values.first) / expected_count
74
+ _(width).must_be_close_to expected
75
+ end
76
+
77
+ it 'raises for empty values' do
78
+ _{Statistics::Bin.width([])}.must_raise ArgumentError
79
+ end
80
+
81
+ it 'raises for unknown method' do
82
+ _{Statistics::Bin.width([1.0, 2.0], method: :bogus)}.must_raise ArgumentError
83
+ end
84
+
85
+ it 'raises for non-positive factor' do
86
+ _{Statistics::Bin.width([1.0, 2.0], factor: 0)}.must_raise ArgumentError
87
+ end
88
+ end
89
+
90
+ describe '.count' do
91
+ it 'calculates square root count by default' do
92
+ values = (1..100).to_a.map(&:to_f)
93
+ _(Statistics::Bin.count(values)).must_equal Math.sqrt(values.size).ceil
94
+ end
95
+
96
+ it 'calculates cube root count' do
97
+ values = (1..100).to_a.map(&:to_f)
98
+ _(Statistics::Bin.count(values, method: :cube_root)).must_equal (values.size ** (1.0 / 3)).ceil
99
+ end
100
+
101
+ it 'calculates tuneable root count with custom factor' do
102
+ values = (1..100).to_a.map(&:to_f)
103
+ _(Statistics::Bin.count(values, method: :tuneable_root, factor: 4.0)).must_equal (values.size ** (1.0 / 4)).ceil
104
+ end
105
+
106
+ it 'calculates Freedman-Diaconis count' do
107
+ values = (1..100).to_a.map(&:to_f)
108
+ expected_width = 2.0 * Statistics::IQR.of(values) * values.size ** (-1.0 / 3)
109
+ expected = ((values.last - values.first) / expected_width).ceil
110
+ _(Statistics::Bin.count(values, method: :freedman_diaconis)).must_equal expected
111
+ end
112
+
113
+ it 'calculates Scott count' do
114
+ values = (1..100).to_a.map(&:to_f)
115
+ expected_width = 3.49 * Statistics::StandardDeviation.of(values) * values.size ** (-1.0 / 3)
116
+ expected = ((values.last - values.first) / expected_width).ceil
117
+ _(Statistics::Bin.count(values, method: :scott)).must_equal expected
118
+ end
119
+
120
+ it 'calculates Sturges count' do
121
+ values = (1..100).to_a.map(&:to_f)
122
+ _(Statistics::Bin.count(values, method: :sturges)).must_equal Math.log2(values.size).ceil + 1
123
+ end
33
124
  end
34
125
 
35
126
  describe '.boundaries' do
@@ -136,6 +136,16 @@ describe Statistics::Histogram do
136
136
  end
137
137
  end
138
138
 
139
+ describe 'with method: :tuneable_root' do
140
+ it 'passes factor through to Bin.width' do
141
+ values = (1..100).to_a
142
+ h_default = Statistics::Histogram.new(values, method: :tuneable_root)
143
+ h_custom = Statistics::Histogram.new(values, method: :tuneable_root, factor: 4.0)
144
+ _(h_default.bins.first.width).must_be_close_to Statistics::Bin.width(values.map(&:to_f), method: :tuneable_root)
145
+ _(h_custom.bins.first.width).must_be_close_to Statistics::Bin.width(values.map(&:to_f), method: :tuneable_root, factor: 4.0)
146
+ end
147
+ end
148
+
139
149
  describe 'with float values' do
140
150
  it 'handles floats correctly' do
141
151
  values = [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statistics.rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - thoran