ministat 1.2.6 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/History.rdoc +7 -0
- data/Manifest.txt +2 -0
- data/lib/ministat.rb +2 -153
- data/lib/ministat/data.rb +192 -0
- data/test/data/3.dat +200 -0
- data/test/test_ministat.rb +98 -28
- metadata +51 -81
data/.gemtest
ADDED
File without changes
|
data/History.rdoc
CHANGED
data/Manifest.txt
CHANGED
data/lib/ministat.rb
CHANGED
@@ -1,156 +1,5 @@
|
|
1
|
-
require '
|
1
|
+
require 'ministat/data'
|
2
2
|
|
3
3
|
module MiniStat
|
4
|
-
VERSION = '1.
|
5
|
-
class Data
|
6
|
-
attr_reader :data
|
7
|
-
|
8
|
-
def initialize(data)
|
9
|
-
@data = data.collect {|data| data.to_f}.sort
|
10
|
-
@sorted = true
|
11
|
-
end
|
12
|
-
|
13
|
-
def <<(obj)
|
14
|
-
throw "#{obj.to_s} is not numeric" unless obj.to_f
|
15
|
-
@data << obj
|
16
|
-
# force computation!
|
17
|
-
@q1 = @q3 = @iqr = @outliers = @std_dev = @variance =
|
18
|
-
@mode = @harmonic_mean = @geometric_mean = nil
|
19
|
-
end
|
20
|
-
|
21
|
-
# Return the median of your dataset. Naive implementaion
|
22
|
-
# -- does a sort on the data.
|
23
|
-
def median(data=@data)
|
24
|
-
unless @sorted and data == @data
|
25
|
-
data.sort!
|
26
|
-
@sort = true
|
27
|
-
end
|
28
|
-
if data.size % 2 == 0
|
29
|
-
return (data[data.size / 2.0 - 1] + data[(data.size / 2.0)]) / 2
|
30
|
-
else
|
31
|
-
# split = (data.size - 1) / 2
|
32
|
-
# this was just weird. i need to look this up
|
33
|
-
# return (data[split - 1.5] + data[split - 0.5]) / 2
|
34
|
-
return data[(data.size - 1)/2]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def partition(pivot, data=@data)
|
39
|
-
low = []
|
40
|
-
high = []
|
41
|
-
data.each do |i|
|
42
|
-
high.push(i) if i > pivot
|
43
|
-
low.push(i) if i < pivot
|
44
|
-
end
|
45
|
-
return {:low => low, :high => high}
|
46
|
-
end
|
47
|
-
|
48
|
-
# First quartile.
|
49
|
-
def q1
|
50
|
-
@q1 ||= median(partition(median(@data), @data)[:low])
|
51
|
-
end
|
52
|
-
|
53
|
-
# Third quartile
|
54
|
-
def q3
|
55
|
-
@q3 ||= median(partition(median(@data), @data)[:high])
|
56
|
-
end
|
57
|
-
|
58
|
-
# Interquartile range, ie, the middle 50% of the data.
|
59
|
-
def iqr
|
60
|
-
@iqr ||= q3 - q1
|
61
|
-
end
|
62
|
-
|
63
|
-
# Returns an array of outlying data points.
|
64
|
-
def outliers
|
65
|
-
@outliers ||=
|
66
|
-
@data.collect do |i|
|
67
|
-
i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr))
|
68
|
-
end.compact
|
69
|
-
end
|
70
|
-
|
71
|
-
# Computes arthmetic mean (most common average).
|
72
|
-
def mean(data=@data)
|
73
|
-
@mean = (data.inject(0) {|i,j| i += j}) / data.size
|
74
|
-
end
|
75
|
-
|
76
|
-
# Computes mode and generates a histogram (for free!).
|
77
|
-
# (We needed it anyway).
|
78
|
-
def mode
|
79
|
-
@hist ||= {}
|
80
|
-
@max_freq ||= 0
|
81
|
-
@mode ||= nil
|
82
|
-
unless @mode
|
83
|
-
@data.each do |val|
|
84
|
-
@hist[val] ||= 0
|
85
|
-
@hist[val] += 1
|
86
|
-
@max_freq, @mode = @hist[val], val if @hist[val] > @max_freq
|
87
|
-
end
|
88
|
-
end
|
89
|
-
@mode
|
90
|
-
end
|
91
|
-
|
92
|
-
# Computes variance. Used to measure degree of spread
|
93
|
-
# in dataset.
|
94
|
-
def variance
|
95
|
-
@variance ||=
|
96
|
-
@data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size)
|
97
|
-
end
|
98
|
-
|
99
|
-
# Standard deviation. Square root of variance, measure of the
|
100
|
-
# spread of the data about the mean.
|
101
|
-
def std_dev
|
102
|
-
@std_dev ||= Math.sqrt(variance)
|
103
|
-
end
|
104
|
-
|
105
|
-
# Geometric mean. Only applies to non-negative numbers, and
|
106
|
-
# relates to log-normal distribution.
|
107
|
-
def geometric_mean
|
108
|
-
@geoeteric_mean ||=
|
109
|
-
(@data.inject(1) {|i,j| i *= j})**(1.0/@data.size)
|
110
|
-
end
|
111
|
-
|
112
|
-
# Harmonic or subcontrary mean. Tends strongly toward the least
|
113
|
-
# elements of the dataset.
|
114
|
-
def harmonic_mean
|
115
|
-
@harmonic_mean ||=
|
116
|
-
@data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)})
|
117
|
-
end
|
118
|
-
|
119
|
-
# Put the histogram into a string if we have it
|
120
|
-
def hist
|
121
|
-
if defined? @hist
|
122
|
-
# this is a textbook example of how to lie with statistics...
|
123
|
-
# TODO: iterate over a range rather than @hist.keys--a histogram
|
124
|
-
# produced out of the keys won't properly represent flat spots
|
125
|
-
# with no data. or something like that. do as i say, not as i do.
|
126
|
-
#
|
127
|
-
# this code borrows liberally from the ruby cookbook, recipe 5.12
|
128
|
-
# ORA, 2006
|
129
|
-
pairs = @hist.keys.collect { |x| [x.to_s, @hist[x]] }.sort
|
130
|
-
largest_key_size = pairs.max {|x,y| x[0].size <=> y[0].size }[0].size
|
131
|
-
pairs.inject("") do |s,kv|
|
132
|
-
s<< "#{kv[0].ljust(largest_key_size)} |#{char*kv[1]}\n"
|
133
|
-
end
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
# Return a string with statisical info about a dataset.
|
138
|
-
def to_s
|
139
|
-
<<-DATA_STR
|
140
|
-
Partition:#{partition(median).inspect}
|
141
|
-
Mean:#{mean}
|
142
|
-
Geometric Mean:#{geometric_mean}
|
143
|
-
Harmonic Mean:#{harmonic_mean}
|
144
|
-
Median:#{median}
|
145
|
-
Min:#{data.min}
|
146
|
-
Q1:#{q1}
|
147
|
-
Q3:#{q3}
|
148
|
-
Max:#{data.max}
|
149
|
-
IQR:#{iqr}
|
150
|
-
Outliers:#{outliers.inspect}
|
151
|
-
Variance:#{variance}
|
152
|
-
Std Dev:#{std_dev}
|
153
|
-
DATA_STR
|
154
|
-
end
|
155
|
-
end
|
4
|
+
VERSION = '1.3.0'
|
156
5
|
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require 'mathn'
|
2
|
+
|
3
|
+
module MiniStat
|
4
|
+
class Data
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
attr_reader :data
|
8
|
+
|
9
|
+
def initialize(d)
|
10
|
+
@data = d.map { |n| n.to_f }.sort
|
11
|
+
@sorted = true
|
12
|
+
clear_results
|
13
|
+
end
|
14
|
+
|
15
|
+
def <<(obj)
|
16
|
+
throw "#{obj.to_s} is not numeric" unless obj.to_f
|
17
|
+
@data << obj
|
18
|
+
# force computation!
|
19
|
+
clear_results
|
20
|
+
@sorted = false
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
def each(&block)
|
25
|
+
@data.each(&block)
|
26
|
+
end
|
27
|
+
|
28
|
+
def clear_results
|
29
|
+
@q1, @q3, @iqr, @outliers, @std_dev, @variance = nil
|
30
|
+
@mode, @harmonic_mean, @geometric_mean = nil
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Return the median of +data+. Naive implementaion
|
35
|
+
# -- does a sort on the data.
|
36
|
+
def median(data=@data)
|
37
|
+
unless @sorted and data == @data
|
38
|
+
data.sort!
|
39
|
+
@sorted = true
|
40
|
+
end
|
41
|
+
if data.size % 2 == 0
|
42
|
+
return (data[data.size / 2.0 - 1] + data[(data.size / 2.0)]) / 2.0
|
43
|
+
else
|
44
|
+
return data[(data.size - 1)/2.0]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Partition a set of numbers about +pivot+
|
50
|
+
def partition(pivot, data=@data)
|
51
|
+
low = []
|
52
|
+
high = []
|
53
|
+
data.each do |i|
|
54
|
+
high.push(i) if i > pivot
|
55
|
+
low.push(i) if i < pivot
|
56
|
+
end
|
57
|
+
return {:low => low, :high => high}
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# A note on quartiles: the below methods DO NOT produce results
|
62
|
+
# that agree with R's default approach. There is no "one true"
|
63
|
+
# method for producing quartiles--R's default is that of S, the
|
64
|
+
# code below uses another (which I find simpler). See:
|
65
|
+
# http://en.wikipedia.org/wiki/Quartile for details
|
66
|
+
|
67
|
+
##
|
68
|
+
# First quartile.
|
69
|
+
def q1
|
70
|
+
@q1 ||= median(partition(median(@data), @data)[:low])
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# Third quartile
|
75
|
+
def q3
|
76
|
+
@q3 ||= median(partition(median(@data), @data)[:high])
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Interquartile range, ie, the middle 50% of the data.
|
81
|
+
def iqr
|
82
|
+
@iqr ||= q3 - q1
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Returns an array of outlying data points.
|
87
|
+
def outliers
|
88
|
+
@outliers ||=
|
89
|
+
@data.map do |i|
|
90
|
+
i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr))
|
91
|
+
end.compact
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Computes arthmetic mean (most common average).
|
96
|
+
def mean(data=@data)
|
97
|
+
@mean = (data.inject(0) {|i,j| i += j}) / data.size
|
98
|
+
end
|
99
|
+
|
100
|
+
##
|
101
|
+
# Computes mode and generates a histogram (for free!).
|
102
|
+
# (We needed it anyway).
|
103
|
+
def mode
|
104
|
+
@hist ||= {}
|
105
|
+
@max_freq ||= 0
|
106
|
+
@mode ||= nil
|
107
|
+
unless @mode
|
108
|
+
@data.each do |val|
|
109
|
+
@hist[val] ||= 0
|
110
|
+
@hist[val] += 1
|
111
|
+
@max_freq, @mode = @hist[val], val if @hist[val] > @max_freq
|
112
|
+
end
|
113
|
+
end
|
114
|
+
@mode
|
115
|
+
end
|
116
|
+
|
117
|
+
##
|
118
|
+
# Computes variance. Used to measure degree of spread
|
119
|
+
# in dataset.
|
120
|
+
def variance
|
121
|
+
@variance ||=
|
122
|
+
@data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size - 1)
|
123
|
+
end
|
124
|
+
|
125
|
+
##
|
126
|
+
# Standard deviation. Square root of variance, measure of the
|
127
|
+
# spread of the data about the mean.
|
128
|
+
def std_dev
|
129
|
+
@std_dev ||= Math.sqrt(variance)
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# Geometric mean. Only applies to non-negative numbers, and
|
134
|
+
# relates to log-normal distribution.
|
135
|
+
def geometric_mean
|
136
|
+
if @data.any? { |x| x < 0 }
|
137
|
+
raise "Geometric mean only applies to non-negative data"
|
138
|
+
end
|
139
|
+
|
140
|
+
@geometric_mean ||=
|
141
|
+
2 ** (mean @data.map { |x| Math.log2(x) })
|
142
|
+
# this overflowed for dataset with large numbers
|
143
|
+
# (@data.inject(1) {|i,j| i *= j})**(1.0/@data.size)
|
144
|
+
end
|
145
|
+
|
146
|
+
##
|
147
|
+
# Harmonic or subcontrary mean. Tends strongly toward the least
|
148
|
+
# elements of the dataset.
|
149
|
+
def harmonic_mean
|
150
|
+
@harmonic_mean ||=
|
151
|
+
@data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)})
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# Put the histogram into a string if we have it
|
156
|
+
def hist
|
157
|
+
if defined? @hist
|
158
|
+
# this is a textbook example of how to lie with statistics...
|
159
|
+
# TODO: iterate over a range rather than @hist.keys--a histogram
|
160
|
+
# produced out of the keys won't properly represent flat spots
|
161
|
+
# with no data. or something like that. do as i say, not as i do.
|
162
|
+
#
|
163
|
+
# this code borrows liberally from the ruby cookbook, recipe 5.12
|
164
|
+
# ORA, 2006
|
165
|
+
pairs = @hist.keys.map { |x| [x.to_s, @hist[x]] }.sort
|
166
|
+
largest_key_size = pairs.max { |x,y| x[0].size <=> y[0].size }[0].size
|
167
|
+
pairs.inject("") do |s,kv|
|
168
|
+
s<< "#{kv[0].ljust(largest_key_size)} |#{char*kv[1]}\n"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Return a string with statisical info about a dataset.
|
174
|
+
def to_s
|
175
|
+
<<-DATA_STR
|
176
|
+
Partition:#{partition(median).inspect}
|
177
|
+
Mean:#{mean}
|
178
|
+
Geometric Mean:#{geometric_mean}
|
179
|
+
Harmonic Mean:#{harmonic_mean}
|
180
|
+
Median:#{median}
|
181
|
+
Min:#{data.min}
|
182
|
+
Q1:#{q1}
|
183
|
+
Q3:#{q3}
|
184
|
+
Max:#{data.max}
|
185
|
+
IQR:#{iqr}
|
186
|
+
Outliers:#{outliers.inspect}
|
187
|
+
Variance:#{variance}
|
188
|
+
Std Dev:#{std_dev}
|
189
|
+
DATA_STR
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/test/data/3.dat
ADDED
@@ -0,0 +1,200 @@
|
|
1
|
+
5662
|
2
|
+
9159
|
3
|
+
9073
|
4
|
+
6734
|
5
|
+
4514
|
6
|
+
57
|
7
|
+
7188
|
8
|
+
2928
|
9
|
+
778
|
10
|
+
9860
|
11
|
+
3746
|
12
|
+
5998
|
13
|
+
9556
|
14
|
+
4296
|
15
|
+
6781
|
16
|
+
3507
|
17
|
+
9411
|
18
|
+
7284
|
19
|
+
1977
|
20
|
+
1805
|
21
|
+
2306
|
22
|
+
4759
|
23
|
+
7086
|
24
|
+
5881
|
25
|
+
7449
|
26
|
+
3757
|
27
|
+
2457
|
28
|
+
6703
|
29
|
+
2717
|
30
|
+
5562
|
31
|
+
7073
|
32
|
+
3073
|
33
|
+
775
|
34
|
+
3854
|
35
|
+
2509
|
36
|
+
4039
|
37
|
+
3083
|
38
|
+
3485
|
39
|
+
568
|
40
|
+
2470
|
41
|
+
8080
|
42
|
+
1252
|
43
|
+
4610
|
44
|
+
2803
|
45
|
+
1711
|
46
|
+
8435
|
47
|
+
5663
|
48
|
+
9774
|
49
|
+
4025
|
50
|
+
3494
|
51
|
+
5417
|
52
|
+
9426
|
53
|
+
2899
|
54
|
+
5857
|
55
|
+
3138
|
56
|
+
6309
|
57
|
+
806
|
58
|
+
50
|
59
|
+
1343
|
60
|
+
3830
|
61
|
+
6158
|
62
|
+
115
|
63
|
+
6471
|
64
|
+
1667
|
65
|
+
6483
|
66
|
+
5617
|
67
|
+
7975
|
68
|
+
3454
|
69
|
+
9927
|
70
|
+
5623
|
71
|
+
9245
|
72
|
+
8514
|
73
|
+
8999
|
74
|
+
205
|
75
|
+
8126
|
76
|
+
2168
|
77
|
+
5621
|
78
|
+
5223
|
79
|
+
3170
|
80
|
+
488
|
81
|
+
7466
|
82
|
+
4310
|
83
|
+
9697
|
84
|
+
8166
|
85
|
+
4740
|
86
|
+
54
|
87
|
+
1562
|
88
|
+
5630
|
89
|
+
2752
|
90
|
+
6499
|
91
|
+
5281
|
92
|
+
7040
|
93
|
+
2177
|
94
|
+
8600
|
95
|
+
1098
|
96
|
+
946
|
97
|
+
5274
|
98
|
+
1211
|
99
|
+
9808
|
100
|
+
6266
|
101
|
+
8639
|
102
|
+
1664
|
103
|
+
5376
|
104
|
+
483
|
105
|
+
9620
|
106
|
+
2365
|
107
|
+
4478
|
108
|
+
6159
|
109
|
+
6123
|
110
|
+
6491
|
111
|
+
2054
|
112
|
+
9623
|
113
|
+
2427
|
114
|
+
5710
|
115
|
+
3082
|
116
|
+
3013
|
117
|
+
9741
|
118
|
+
7880
|
119
|
+
9302
|
120
|
+
3213
|
121
|
+
22
|
122
|
+
644
|
123
|
+
9384
|
124
|
+
8401
|
125
|
+
3459
|
126
|
+
1396
|
127
|
+
5095
|
128
|
+
6079
|
129
|
+
5348
|
130
|
+
578
|
131
|
+
121
|
132
|
+
5932
|
133
|
+
3206
|
134
|
+
8480
|
135
|
+
6580
|
136
|
+
6161
|
137
|
+
3352
|
138
|
+
7017
|
139
|
+
7815
|
140
|
+
9826
|
141
|
+
6287
|
142
|
+
7483
|
143
|
+
6786
|
144
|
+
1161
|
145
|
+
599
|
146
|
+
8235
|
147
|
+
9864
|
148
|
+
5665
|
149
|
+
1381
|
150
|
+
2241
|
151
|
+
8478
|
152
|
+
4958
|
153
|
+
674
|
154
|
+
1771
|
155
|
+
7453
|
156
|
+
6345
|
157
|
+
5504
|
158
|
+
6538
|
159
|
+
5014
|
160
|
+
1300
|
161
|
+
3686
|
162
|
+
4839
|
163
|
+
7173
|
164
|
+
6402
|
165
|
+
3276
|
166
|
+
2592
|
167
|
+
8376
|
168
|
+
8602
|
169
|
+
926
|
170
|
+
273
|
171
|
+
6680
|
172
|
+
4613
|
173
|
+
5880
|
174
|
+
1420
|
175
|
+
9842
|
176
|
+
2939
|
177
|
+
2348
|
178
|
+
1308
|
179
|
+
8784
|
180
|
+
2821
|
181
|
+
7724
|
182
|
+
7030
|
183
|
+
3020
|
184
|
+
79
|
185
|
+
3876
|
186
|
+
8801
|
187
|
+
2376
|
188
|
+
3309
|
189
|
+
656
|
190
|
+
5018
|
191
|
+
4847
|
192
|
+
7152
|
193
|
+
7216
|
194
|
+
9180
|
195
|
+
2097
|
196
|
+
2149
|
197
|
+
3597
|
198
|
+
3923
|
199
|
+
5742
|
200
|
+
9623
|
data/test/test_ministat.rb
CHANGED
@@ -1,20 +1,19 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'minitest/
|
3
|
-
require 'ministat'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
require 'ministat'
|
4
4
|
|
5
5
|
class TestMiniStat < MiniTest::Unit::TestCase
|
6
6
|
def setup
|
7
7
|
@data1 = [34, 47, 1, 15, 57, 24, 20, 11, 19, 50, 28, 37]
|
8
8
|
@data2 = [60, 56, 61, 68, 51, 53, 69, 54]
|
9
|
-
@data3 = File.open('test/data/1.dat').map {|l| l.chomp}
|
10
|
-
@data4 = File.open('test/data/2.dat').map {|l| l.chomp}
|
9
|
+
@data3 = File.open('test/data/1.dat').map { |l| l.chomp }
|
10
|
+
@data4 = File.open('test/data/2.dat').map { |l| l.chomp }
|
11
|
+
@data5 = File.open('test/data/3.dat').map { |l| l.chomp }
|
11
12
|
@ms1 = MiniStat::Data.new(@data1)
|
12
13
|
@ms2 = MiniStat::Data.new(@data2)
|
13
14
|
@ms3 = MiniStat::Data.new(@data3)
|
14
15
|
@ms4 = MiniStat::Data.new(@data4)
|
15
|
-
|
16
|
-
# possible floating point and rounding errors
|
17
|
-
@error = 0.001
|
16
|
+
@ms5 = MiniStat::Data.new(@data5)
|
18
17
|
end
|
19
18
|
|
20
19
|
def test_enum
|
@@ -24,20 +23,44 @@ class TestMiniStat < MiniTest::Unit::TestCase
|
|
24
23
|
assert ms.mean == 1.5
|
25
24
|
end
|
26
25
|
|
26
|
+
##
|
27
|
+
# Expected test values are computed in a 3rd party statistics
|
28
|
+
# package (usually R) when possible.
|
29
|
+
#
|
30
|
+
# All the basic tests do a dummy check of adding a value to the data
|
31
|
+
# set and ensuring the the values are re-computed.
|
32
|
+
|
27
33
|
def test_iqr
|
28
|
-
|
34
|
+
assert_in_delta 25, @ms1.iqr
|
35
|
+
assert_in_delta 11, @ms2.iqr
|
36
|
+
assert_in_delta 1.94, @ms3.iqr
|
37
|
+
assert_in_delta 9.15, @ms4.iqr
|
38
|
+
assert_in_delta 4677, @ms5.iqr
|
39
|
+
|
40
|
+
@ms1 << 32
|
41
|
+
assert_in_delta 25, @ms1.iqr
|
29
42
|
end
|
30
43
|
|
31
44
|
def test_mean
|
32
|
-
|
33
|
-
|
34
|
-
|
45
|
+
assert_in_delta 28.583, @ms1.mean
|
46
|
+
assert_in_delta 59, @ms2.mean
|
47
|
+
assert_in_delta 2.179, @ms3.mean
|
48
|
+
assert_in_delta 1.878, @ms4.mean
|
49
|
+
assert_in_delta 4884.695, @ms5.mean
|
50
|
+
|
51
|
+
@ms1 << 32
|
52
|
+
assert_in_delta 28.84615, @ms1.mean
|
35
53
|
end
|
36
54
|
|
37
55
|
def test_median
|
38
|
-
|
39
|
-
|
40
|
-
|
56
|
+
assert_in_delta 26, @ms1.median
|
57
|
+
assert_in_delta 58, @ms2.median
|
58
|
+
assert_in_delta 1.735, @ms3.median
|
59
|
+
assert_in_delta 2.8, @ms4.median
|
60
|
+
assert_in_delta 5016, @ms5.median
|
61
|
+
|
62
|
+
@ms1 << 32
|
63
|
+
assert_in_delta 28, @ms1.median
|
41
64
|
end
|
42
65
|
|
43
66
|
def test_mode
|
@@ -48,34 +71,81 @@ class TestMiniStat < MiniTest::Unit::TestCase
|
|
48
71
|
end
|
49
72
|
|
50
73
|
def test_outliers
|
51
|
-
assert_equal
|
74
|
+
assert_equal @ms1.outliers, []
|
75
|
+
assert_includes @ms3.outliers, 6.0
|
76
|
+
|
77
|
+
@ms1 << 1000
|
78
|
+
assert_includes @ms1.outliers, 1000
|
52
79
|
end
|
53
80
|
|
54
81
|
def test_q1
|
55
|
-
|
56
|
-
|
57
|
-
|
82
|
+
assert_in_delta 17, @ms1.q1
|
83
|
+
assert_in_delta 53.5, @ms2.q1
|
84
|
+
assert_in_delta 1.05, @ms3.q1
|
85
|
+
assert_in_delta(-2.4, @ms4.q1)
|
86
|
+
assert_in_delta 2442, @ms5.q1
|
87
|
+
|
88
|
+
# add below our initial q1 to skew the result
|
89
|
+
@ms1 << 1
|
90
|
+
assert_in_delta 13, @ms1.q1
|
58
91
|
end
|
59
92
|
|
60
93
|
def test_q3
|
61
|
-
|
62
|
-
|
63
|
-
|
94
|
+
assert_in_delta 42, @ms1.q3
|
95
|
+
assert_in_delta 64.5, @ms2.q3
|
96
|
+
assert_in_delta 2.99, @ms3.q3
|
97
|
+
assert_in_delta 6.75, @ms4.q3
|
98
|
+
assert_in_delta 7119, @ms5.q3
|
99
|
+
|
100
|
+
# add above our initial q3 to skew the result
|
101
|
+
@ms1 << 100
|
102
|
+
assert_in_delta 48.5, @ms1.q3
|
103
|
+
end
|
104
|
+
|
105
|
+
def test_variance
|
106
|
+
assert_in_delta 286.0833, @ms1.variance
|
107
|
+
assert_in_delta 45.71429, @ms2.variance
|
108
|
+
assert_in_delta 2.297878, @ms3.variance
|
109
|
+
assert_in_delta 57.06196, @ms4.variance
|
110
|
+
assert_in_delta 8376067, @ms5.variance, 1
|
111
|
+
|
112
|
+
@ms1 << 32
|
113
|
+
assert_in_delta 263.141, @ms1.variance
|
64
114
|
end
|
65
115
|
|
66
116
|
def test_std_dev
|
67
|
-
|
68
|
-
|
69
|
-
|
117
|
+
assert_in_delta 16.914, @ms1.std_dev
|
118
|
+
assert_in_delta 6.761, @ms2.std_dev
|
119
|
+
assert_in_delta 1.515, @ms3.std_dev
|
120
|
+
assert_in_delta 7.553, @ms4.std_dev
|
121
|
+
assert_in_delta 2894.144, @ms5.std_dev
|
122
|
+
|
123
|
+
@ms1 << 32
|
124
|
+
assert_in_delta 16.22162, @ms1.std_dev
|
70
125
|
end
|
71
126
|
|
72
127
|
def test_geo_mean
|
73
|
-
|
128
|
+
assert_in_delta 58.66896, @ms2.geometric_mean
|
129
|
+
assert_in_delta 1.695651, @ms3.geometric_mean
|
130
|
+
# @ms4 contains negative numbers, so we should complain
|
131
|
+
assert_raises RuntimeError do
|
132
|
+
@ms4.geometric_mean
|
133
|
+
end
|
134
|
+
assert_in_delta 3463.229, @ms5.geometric_mean
|
135
|
+
|
136
|
+
@ms1 << 32
|
137
|
+
assert_in_delta 21.63259, @ms1.geometric_mean
|
74
138
|
end
|
75
139
|
|
76
140
|
def test_harm_mean
|
77
|
-
|
141
|
+
assert_in_delta 8.259642, @ms1.harmonic_mean
|
142
|
+
assert_in_delta 58.34724, @ms2.harmonic_mean
|
143
|
+
assert_in_delta 1.218216, @ms3.harmonic_mean
|
144
|
+
assert_in_delta 5.921447, @ms4.harmonic_mean
|
145
|
+
assert_in_delta 976.2331, @ms5.harmonic_mean
|
146
|
+
|
147
|
+
@ms1 << 32
|
148
|
+
assert_in_delta 8.759532, @ms1.harmonic_mean
|
78
149
|
end
|
79
|
-
end
|
80
150
|
|
81
|
-
|
151
|
+
end
|
metadata
CHANGED
@@ -1,121 +1,91 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ministat
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 2
|
8
|
-
- 6
|
9
|
-
version: 1.2.6
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.3.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Dean Hudson
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
21
|
-
name: rubyforge
|
22
|
-
prerelease: false
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
-
none: false
|
25
|
-
requirements:
|
26
|
-
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 2
|
30
|
-
- 0
|
31
|
-
- 4
|
32
|
-
version: 2.0.4
|
33
|
-
type: :development
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-10-16 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
36
15
|
name: minitest
|
37
|
-
|
38
|
-
requirement: &id002 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &70294540175900 !ruby/object:Gem::Requirement
|
39
17
|
none: false
|
40
|
-
requirements:
|
18
|
+
requirements:
|
41
19
|
- - ~>
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
- 2
|
45
|
-
- 0
|
46
|
-
version: "2.0"
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '2.0'
|
47
22
|
type: :development
|
48
|
-
version_requirements: *id002
|
49
|
-
- !ruby/object:Gem::Dependency
|
50
|
-
name: hoe
|
51
23
|
prerelease: false
|
52
|
-
|
24
|
+
version_requirements: *70294540175900
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
requirement: &70294540175380 !ruby/object:Gem::Requirement
|
53
28
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
- 2
|
59
|
-
- 6
|
60
|
-
- 2
|
61
|
-
version: 2.6.2
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2.12'
|
62
33
|
type: :development
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70294540175380
|
36
|
+
description: ! 'This is a simple package that generates simple statistical info on
|
37
|
+
|
38
|
+
numerical data sets of a single variable. It''s nothing too fancy, but
|
39
|
+
|
67
40
|
maybe just enough to coat your numbers with a thin layer of science. Or,
|
68
|
-
|
69
|
-
|
41
|
+
|
42
|
+
at least to get you thinking about what it may take to do so.'
|
43
|
+
email:
|
70
44
|
- dean@ero.com
|
71
|
-
executables:
|
45
|
+
executables:
|
72
46
|
- ministat
|
73
47
|
extensions: []
|
74
|
-
|
75
|
-
extra_rdoc_files:
|
48
|
+
extra_rdoc_files:
|
76
49
|
- Manifest.txt
|
77
|
-
files:
|
50
|
+
files:
|
78
51
|
- History.rdoc
|
79
52
|
- Manifest.txt
|
80
53
|
- README.rdoc
|
81
54
|
- Rakefile
|
82
55
|
- bin/ministat
|
83
56
|
- lib/ministat.rb
|
57
|
+
- lib/ministat/data.rb
|
84
58
|
- test/data/1.dat
|
85
59
|
- test/data/2.dat
|
60
|
+
- test/data/3.dat
|
86
61
|
- test/test_ministat.rb
|
87
|
-
|
62
|
+
- .gemtest
|
88
63
|
homepage: http://github.com/deanh/MiniStat
|
89
64
|
licenses: []
|
90
|
-
|
91
65
|
post_install_message:
|
92
|
-
rdoc_options:
|
66
|
+
rdoc_options:
|
93
67
|
- --main
|
94
68
|
- README.rdoc
|
95
|
-
require_paths:
|
69
|
+
require_paths:
|
96
70
|
- lib
|
97
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
72
|
none: false
|
99
|
-
requirements:
|
100
|
-
- -
|
101
|
-
- !ruby/object:Gem::Version
|
102
|
-
|
103
|
-
|
104
|
-
version: "0"
|
105
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
78
|
none: false
|
107
|
-
requirements:
|
108
|
-
- -
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
|
111
|
-
- 0
|
112
|
-
version: "0"
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
113
83
|
requirements: []
|
114
|
-
|
115
84
|
rubyforge_project: ministat
|
116
|
-
rubygems_version: 1.
|
85
|
+
rubygems_version: 1.8.10
|
117
86
|
signing_key:
|
118
87
|
specification_version: 3
|
119
|
-
summary: This is a simple package that generates simple statistical info on numerical
|
120
|
-
|
88
|
+
summary: This is a simple package that generates simple statistical info on numerical
|
89
|
+
data sets of a single variable
|
90
|
+
test_files:
|
121
91
|
- test/test_ministat.rb
|