more_math 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGES +2 -0
- data/LICENSE +18 -0
- data/README +20 -0
- data/Rakefile +84 -0
- data/VERSION +1 -0
- data/install.rb +19 -0
- data/lib/more_math/cantor_pairing_funtion.rb +49 -0
- data/lib/more_math/constants/functions_constants.rb +29 -0
- data/lib/more_math/continued_fraction.rb +140 -0
- data/lib/more_math/distributions.rb +134 -0
- data/lib/more_math/exceptions.rb +6 -0
- data/lib/more_math/functions.rb +151 -0
- data/lib/more_math/histogram.rb +62 -0
- data/lib/more_math/linear_regression.rb +78 -0
- data/lib/more_math/newton_bisection.rb +66 -0
- data/lib/more_math/sequence.rb +337 -0
- data/lib/more_math/version.rb +8 -0
- data/lib/more_math.rb +9 -0
- data/make_doc.rb +5 -0
- data/tests/test_analysis.rb +321 -0
- data/tests/test_cantor_pairing_function.rb +23 -0
- data/tests/test_continued_fraction.rb +40 -0
- data/tests/test_distribution.rb +69 -0
- data/tests/test_functions.rb +33 -0
- data/tests/test_histogram.rb +29 -0
- data/tests/test_newton_bisection.rb +28 -0
- metadata +108 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# A histogram gives an overview of a sequence's elements.
|
5
|
+
class Histogram
|
6
|
+
# Create a Histogram for the elements of +sequence+ with +bins+ bins.
|
7
|
+
def initialize(sequence, bins = 10)
|
8
|
+
@sequence = sequence
|
9
|
+
@bins = bins
|
10
|
+
@result = compute
|
11
|
+
end
|
12
|
+
|
13
|
+
# Number of bins for this Histogram.
|
14
|
+
attr_reader :bins
|
15
|
+
|
16
|
+
# Return the computed histogram as an array of arrays.
|
17
|
+
def to_a
|
18
|
+
@result
|
19
|
+
end
|
20
|
+
|
21
|
+
# Display this histogram to +output+, +width+ is the parameter for
|
22
|
+
# +prepare_display+
|
23
|
+
def display(output = $stdout, width = 50)
|
24
|
+
d = prepare_display(width)
|
25
|
+
for l, bar, r in d
|
26
|
+
output << "%11.5f -|%s\n" % [ (l + r) / 2.0, "*" * bar ]
|
27
|
+
end
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Returns an array of tuples (l, c, r) where +l+ is the left bin edge, +c+
|
34
|
+
# the +width+-normalized frequence count value, and +r+ the right bin
|
35
|
+
# edge. +width+ is usually an integer number representing the width of a
|
36
|
+
# histogram bar.
|
37
|
+
def prepare_display(width)
|
38
|
+
r = @result.reverse
|
39
|
+
factor = width.to_f / (r.transpose[1].max)
|
40
|
+
r.map { |l, c, r| [ l, (c * factor).round, r ] }
|
41
|
+
end
|
42
|
+
|
43
|
+
# Computes the histogram and returns it as an array of tuples (l, c, r).
|
44
|
+
def compute
|
45
|
+
@sequence.empty? and return []
|
46
|
+
last_r = -Infinity
|
47
|
+
min = @sequence.min
|
48
|
+
max = @sequence.max
|
49
|
+
step = (max - min) / bins.to_f
|
50
|
+
Array.new(bins) do |i|
|
51
|
+
l = min + i * step
|
52
|
+
r = min + (i + 1) * step
|
53
|
+
c = 0
|
54
|
+
@sequence.each do |x|
|
55
|
+
x > last_r and (x <= r || i == bins - 1) and c += 1
|
56
|
+
end
|
57
|
+
last_r = r
|
58
|
+
[ l, c, r ]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# This class computes a linear regression for the given image and domain data
|
5
|
+
# sets.
|
6
|
+
class LinearRegression
|
7
|
+
def initialize(image, domain = (0...image.size).to_a)
|
8
|
+
image.size != domain.size and raise ArgumentError,
|
9
|
+
"image and domain have unequal sizes"
|
10
|
+
@image, @domain = image, domain
|
11
|
+
compute
|
12
|
+
end
|
13
|
+
|
14
|
+
# The image data as an array.
|
15
|
+
attr_reader :image
|
16
|
+
|
17
|
+
# The domain data as an array.
|
18
|
+
attr_reader :domain
|
19
|
+
|
20
|
+
# The slope of the line.
|
21
|
+
attr_reader :a
|
22
|
+
|
23
|
+
# The offset of the line.
|
24
|
+
attr_reader :b
|
25
|
+
|
26
|
+
# Return true if the slope of the underlying data (not the sample data
|
27
|
+
# passed into the constructor of this LinearRegression instance) is likely
|
28
|
+
# (with alpha level _alpha_) to be zero.
|
29
|
+
def slope_zero?(alpha = 0.05)
|
30
|
+
df = @image.size - 2
|
31
|
+
return true if df <= 0 # not enough values to check
|
32
|
+
t = tvalue(alpha)
|
33
|
+
td = TDistribution.new df
|
34
|
+
t.abs <= td.inverse_probability(1 - alpha.abs / 2.0).abs
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns the residues of this linear regression in relation to the given
|
38
|
+
# domain and image.
|
39
|
+
def residues
|
40
|
+
result = []
|
41
|
+
@domain.zip(@image) do |x, y|
|
42
|
+
result << y - (@a * x + @b)
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def compute
|
50
|
+
size = @image.size
|
51
|
+
sum_xx = sum_xy = sum_x = sum_y = 0.0
|
52
|
+
@domain.zip(@image) do |x, y|
|
53
|
+
x += 1
|
54
|
+
sum_xx += x ** 2
|
55
|
+
sum_xy += x * y
|
56
|
+
sum_x += x
|
57
|
+
sum_y += y
|
58
|
+
end
|
59
|
+
@a = (size * sum_xy - sum_x * sum_y) / (size * sum_xx - sum_x ** 2)
|
60
|
+
@b = (sum_y - @a * sum_x) / size
|
61
|
+
self
|
62
|
+
end
|
63
|
+
|
64
|
+
def tvalue(alpha = 0.05)
|
65
|
+
df = @image.size - 2
|
66
|
+
return 0.0 if df <= 0
|
67
|
+
sse_y = 0.0
|
68
|
+
@domain.zip(@image) do |x, y|
|
69
|
+
f_x = a * x + b
|
70
|
+
sse_y += (y - f_x) ** 2
|
71
|
+
end
|
72
|
+
mean = @image.inject(0.0) { |s, y| s + y } / @image.size
|
73
|
+
sse_x = @domain.inject(0.0) { |s, x| s + (x - mean) ** 2 }
|
74
|
+
t = a / (Math.sqrt(sse_y / df) / Math.sqrt(sse_x))
|
75
|
+
t.nan? ? 0.0 : t
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'more_math/exceptions'
|
2
|
+
require 'more_math'
|
3
|
+
|
4
|
+
module MoreMath
|
5
|
+
# This class is used to find the root of a function with Newton's bisection
|
6
|
+
# method.
|
7
|
+
class NewtonBisection
|
8
|
+
include MoreMath::Exceptions
|
9
|
+
|
10
|
+
# Creates a NewtonBisection instance for +function+, a one-argument block.
|
11
|
+
def initialize(&function)
|
12
|
+
@function = function
|
13
|
+
end
|
14
|
+
|
15
|
+
# The function, passed into the constructor.
|
16
|
+
attr_reader :function
|
17
|
+
|
18
|
+
# Return a bracket around a root, starting from the initial +range+. The
|
19
|
+
# method returns nil, if no such bracket around a root could be found after
|
20
|
+
# +n+ tries with the scaling +factor+.
|
21
|
+
def bracket(range = -1..1, n = 50, factor = 1.6)
|
22
|
+
x1, x2 = range.first.to_f, range.last.to_f
|
23
|
+
x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
|
24
|
+
f1, f2 = @function[x1], @function[x2]
|
25
|
+
n.times do
|
26
|
+
f1 * f2 < 0 and return x1..x2
|
27
|
+
if f1.abs < f2.abs
|
28
|
+
f1 = @function[x1 += factor * (x1 - x2)]
|
29
|
+
else
|
30
|
+
f2 = @function[x2 += factor * (x2 - x1)]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
return
|
34
|
+
end
|
35
|
+
|
36
|
+
# Find the root of function in +range+ and return it. The method raises a
|
37
|
+
# DivergentException, if no such root could be found after +n+ tries and in
|
38
|
+
# the +epsilon+ environment.
|
39
|
+
def solve(range = nil, n = 1 << 16, epsilon = 1E-16)
|
40
|
+
if range
|
41
|
+
x1, x2 = range.first.to_f, range.last.to_f
|
42
|
+
x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
|
43
|
+
elsif range = bracket
|
44
|
+
x1, x2 = range.first, range.last
|
45
|
+
else
|
46
|
+
raise DivergentException, "bracket could not be determined"
|
47
|
+
end
|
48
|
+
f = @function[x1]
|
49
|
+
fmid = @function[x2]
|
50
|
+
f * fmid >= 0 and raise DivergentException, "root must be bracketed in #{range}"
|
51
|
+
root = if f < 0
|
52
|
+
dx = x2 - x1
|
53
|
+
x1
|
54
|
+
else
|
55
|
+
dx = x1 - x2
|
56
|
+
x2
|
57
|
+
end
|
58
|
+
n.times do
|
59
|
+
fmid = @function[xmid = root + (dx *= 0.5)]
|
60
|
+
fmid < 0 and root = xmid
|
61
|
+
dx.abs < epsilon or fmid == 0 and return root
|
62
|
+
end
|
63
|
+
raise DivergentException, "too many iterations (#{n})"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,337 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# This class is used to contain elements and compute various statistical
|
5
|
+
# values for them.
|
6
|
+
class Sequence
|
7
|
+
def initialize(elements)
|
8
|
+
@elements = elements
|
9
|
+
@elements.freeze
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns the array of elements.
|
13
|
+
attr_reader :elements
|
14
|
+
|
15
|
+
# Calls the +block+ for every element of this Sequence.
|
16
|
+
def each(&block)
|
17
|
+
@elements.each(&block)
|
18
|
+
end
|
19
|
+
include Enumerable
|
20
|
+
|
21
|
+
# Returns true if this sequence is empty, otherwise false.
|
22
|
+
def empty?
|
23
|
+
@elements.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the number of elements, on which the analysis is based.
|
27
|
+
def size
|
28
|
+
@elements.size
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the variance of the elements.
|
32
|
+
def variance
|
33
|
+
@variance ||= sum_of_squares / size
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the sample_variance of the elements.
|
37
|
+
def sample_variance
|
38
|
+
@sample_variance ||= size > 1 ? sum_of_squares / (size - 1.0) : 0.0
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns the sum of squares (the sum of the squared deviations) of the
|
42
|
+
# elements.
|
43
|
+
def sum_of_squares
|
44
|
+
@sum_of_squares ||= @elements.inject(0.0) { |s, t| s + (t - arithmetic_mean) ** 2 }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the standard deviation of the elements.
|
48
|
+
def standard_deviation
|
49
|
+
@sample_deviation ||= Math.sqrt(variance)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the standard deviation of the elements in percentage of the
|
53
|
+
# arithmetic mean.
|
54
|
+
def standard_deviation_percentage
|
55
|
+
@standard_deviation_percentage ||= 100.0 * standard_deviation / arithmetic_mean
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns the sample standard deviation of the elements.
|
59
|
+
def sample_standard_deviation
|
60
|
+
@sample_standard_deviation ||= Math.sqrt(sample_variance)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns the sample standard deviation of the elements in percentage
|
64
|
+
# of the arithmetic mean.
|
65
|
+
def sample_standard_deviation_percentage
|
66
|
+
@sample_standard_deviation_percentage ||= 100.0 * sample_standard_deviation / arithmetic_mean
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns the sum of all elements.
|
70
|
+
def sum
|
71
|
+
@sum ||= @elements.inject(0.0) { |s, t| s + t }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns the arithmetic mean of the elements.
|
75
|
+
def arithmetic_mean
|
76
|
+
@arithmetic_mean ||= sum / size
|
77
|
+
end
|
78
|
+
|
79
|
+
alias mean arithmetic_mean
|
80
|
+
|
81
|
+
# Returns the harmonic mean of the elements. If any of the elements
|
82
|
+
# is less than or equal to 0.0, this method returns NaN.
|
83
|
+
def harmonic_mean
|
84
|
+
@harmonic_mean ||= (
|
85
|
+
sum = @elements.inject(0.0) { |s, t|
|
86
|
+
if t > 0
|
87
|
+
s + 1.0 / t
|
88
|
+
else
|
89
|
+
break nil
|
90
|
+
end
|
91
|
+
}
|
92
|
+
sum ? size / sum : 0 / 0.0
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the geometric mean of the elements. If any of the
|
97
|
+
# elements is less than 0.0, this method returns NaN.
|
98
|
+
def geometric_mean
|
99
|
+
@geometric_mean ||= (
|
100
|
+
sum = @elements.inject(0.0) { |s, t|
|
101
|
+
case
|
102
|
+
when t > 0
|
103
|
+
s + Math.log(t)
|
104
|
+
when t == 0
|
105
|
+
break :null
|
106
|
+
else
|
107
|
+
break nil
|
108
|
+
end
|
109
|
+
}
|
110
|
+
case sum
|
111
|
+
when :null
|
112
|
+
0.0
|
113
|
+
when Float
|
114
|
+
Math.exp(sum / size)
|
115
|
+
else
|
116
|
+
0 / 0.0
|
117
|
+
end
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns the minimum of the elements.
|
122
|
+
def min
|
123
|
+
@min ||= @elements.min
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns the maximum of the elements.
|
127
|
+
def max
|
128
|
+
@max ||= @elements.max
|
129
|
+
end
|
130
|
+
|
131
|
+
# Returns the +p+-percentile of the elements.
|
132
|
+
# There are many methods to compute the percentile, this method uses the
|
133
|
+
# the weighted average at x_(n + 1)p, which allows p to be in 0...100
|
134
|
+
# (excluding the 100).
|
135
|
+
def percentile(p = 50)
|
136
|
+
(0...100).include?(p) or
|
137
|
+
raise ArgumentError, "p = #{p}, but has to be in (0...100)"
|
138
|
+
p /= 100.0
|
139
|
+
@sorted ||= @elements.sort
|
140
|
+
r = p * (@sorted.size + 1)
|
141
|
+
r_i = r.to_i
|
142
|
+
r_f = r - r_i
|
143
|
+
if r_i >= 1
|
144
|
+
result = @sorted[r_i - 1]
|
145
|
+
if r_i < @sorted.size
|
146
|
+
result += r_f * (@sorted[r_i] - @sorted[r_i - 1])
|
147
|
+
end
|
148
|
+
else
|
149
|
+
result = @sorted[0]
|
150
|
+
end
|
151
|
+
result
|
152
|
+
end
|
153
|
+
|
154
|
+
alias median percentile
|
155
|
+
|
156
|
+
# Use an approximation of the Welch-Satterthwaite equation to compute the
|
157
|
+
# degrees of freedom for Welch's t-test.
|
158
|
+
def compute_welch_df(other)
|
159
|
+
(sample_variance / size + other.sample_variance / other.size) ** 2 / (
|
160
|
+
(sample_variance ** 2 / (size ** 2 * (size - 1))) +
|
161
|
+
(other.sample_variance ** 2 / (other.size ** 2 * (other.size - 1))))
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns the t value of the Welch's t-test between this Sequence
|
165
|
+
# instance and the +other+.
|
166
|
+
def t_welch(other)
|
167
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
168
|
+
noise = Math.sqrt(sample_variance / size +
|
169
|
+
other.sample_variance / other.size)
|
170
|
+
signal / noise
|
171
|
+
rescue Errno::EDOM
|
172
|
+
0.0
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns an estimation of the common standard deviation of the
|
176
|
+
# elements of this and +other+.
|
177
|
+
def common_standard_deviation(other)
|
178
|
+
Math.sqrt(common_variance(other))
|
179
|
+
end
|
180
|
+
|
181
|
+
# Returns an estimation of the common variance of the elements of this
|
182
|
+
# and +other+.
|
183
|
+
def common_variance(other)
|
184
|
+
(size - 1) * sample_variance + (other.size - 1) *
|
185
|
+
other.sample_variance / (size + other.size - 2)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Compute the # degrees of freedom for Student's t-test.
|
189
|
+
def compute_student_df(other)
|
190
|
+
size + other.size - 2
|
191
|
+
end
|
192
|
+
|
193
|
+
# Returns the t value of the Student's t-test between this Sequence
|
194
|
+
# instance and the +other+.
|
195
|
+
def t_student(other)
|
196
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
197
|
+
noise = common_standard_deviation(other) *
|
198
|
+
Math.sqrt(size ** -1 + size ** -1)
|
199
|
+
rescue Errno::EDOM
|
200
|
+
0.0
|
201
|
+
end
|
202
|
+
|
203
|
+
# Compute a sample size, that will more likely yield a mean difference
|
204
|
+
# between this instance's elements and those of +other+. Use +alpha+
|
205
|
+
# and +beta+ as levels for the first- and second-order errors.
|
206
|
+
def suggested_sample_size(other, alpha = 0.05, beta = 0.05)
|
207
|
+
alpha, beta = alpha.abs, beta.abs
|
208
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
209
|
+
df = size + other.size - 2
|
210
|
+
pooled_variance_estimate = (sum_of_squares + other.sum_of_squares) / df
|
211
|
+
td = TDistribution.new df
|
212
|
+
(((td.inverse_probability(alpha) + td.inverse_probability(beta)) *
|
213
|
+
Math.sqrt(pooled_variance_estimate)) / signal) ** 2
|
214
|
+
end
|
215
|
+
|
216
|
+
# Return true, if the Sequence instance covers the +other+, that is their
|
217
|
+
# arithmetic mean value is most likely to be equal for the +alpha+ error
|
218
|
+
# level.
|
219
|
+
def cover?(other, alpha = 0.05)
|
220
|
+
t = t_welch(other)
|
221
|
+
td = TDistribution.new(compute_welch_df(other))
|
222
|
+
t.abs < td.inverse_probability(1 - alpha.abs / 2.0)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Return the confidence interval for the arithmetic mean with alpha level +alpha+ of
|
226
|
+
# the elements of this Sequence instance as a Range object.
|
227
|
+
def confidence_interval(alpha = 0.05)
|
228
|
+
td = TDistribution.new(size - 1)
|
229
|
+
t = td.inverse_probability(alpha / 2).abs
|
230
|
+
delta = t * sample_standard_deviation / Math.sqrt(size)
|
231
|
+
(arithmetic_mean - delta)..(arithmetic_mean + delta)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Returns the array of autovariances (of length size - 1).
|
235
|
+
def autovariance
|
236
|
+
Array.new(size - 1) do |k|
|
237
|
+
s = 0.0
|
238
|
+
0.upto(size - k - 1) do |i|
|
239
|
+
s += (@elements[i] - arithmetic_mean) * (@elements[i + k] - arithmetic_mean)
|
240
|
+
end
|
241
|
+
s / size
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns the array of autocorrelation values c_k / c_0 (of length size -
|
246
|
+
# 1).
|
247
|
+
def autocorrelation
|
248
|
+
c = autovariance
|
249
|
+
Array.new(c.size) { |k| c[k] / c[0] }
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the d-value for the Durbin-Watson statistic. The value is d << 2
|
253
|
+
# for positive, d >> 2 for negative and d around 2 for no autocorrelation.
|
254
|
+
def durbin_watson_statistic
|
255
|
+
e = linear_regression.residues
|
256
|
+
e.size <= 1 and return 2.0
|
257
|
+
(1...e.size).inject(0.0) { |s, i| s + (e[i] - e[i - 1]) ** 2 } /
|
258
|
+
e.inject(0.0) { |s, x| s + x ** 2 }
|
259
|
+
end
|
260
|
+
|
261
|
+
# Returns the q value of the Ljung-Box statistic for the number of lags
|
262
|
+
# +lags+. A higher value might indicate autocorrelation in the elements of
|
263
|
+
# this Sequence instance. This method returns nil if there weren't enough
|
264
|
+
# (at least lags) lags available.
|
265
|
+
def ljung_box_statistic(lags = 20)
|
266
|
+
r = autocorrelation
|
267
|
+
lags >= r.size and return
|
268
|
+
n = size
|
269
|
+
n * (n + 2) * (1..lags).inject(0.0) { |s, i| s + r[i] ** 2 / (n - i) }
|
270
|
+
end
|
271
|
+
|
272
|
+
# This method tries to detect autocorrelation with the Ljung-Box
|
273
|
+
# statistic. If enough lags can be considered it returns a hash with
|
274
|
+
# results, otherwise nil is returned. The keys are
|
275
|
+
# :lags:: the number of lags,
|
276
|
+
# :alpha_level:: the alpha level for the test,
|
277
|
+
# :q:: the value of the ljung_box_statistic,
|
278
|
+
# :p:: the p-value computed, if p is higher than alpha no correlation was detected,
|
279
|
+
# :detected:: true if a correlation was found.
|
280
|
+
def detect_autocorrelation(lags = 20, alpha_level = 0.05)
|
281
|
+
if q = ljung_box_statistic(lags)
|
282
|
+
p = ChiSquareDistribution.new(lags).probability(q)
|
283
|
+
return {
|
284
|
+
:lags => lags,
|
285
|
+
:alpha_level => alpha_level,
|
286
|
+
:q => q,
|
287
|
+
:p => p,
|
288
|
+
:detected => p >= 1 - alpha_level,
|
289
|
+
}
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Return a result hash with the number of :very_low, :low, :high, and
|
294
|
+
# :very_high outliers, determined by the box plotting algorithm run with
|
295
|
+
# :median and :iqr parameters. If no outliers were found or the iqr is
|
296
|
+
# less than epsilon, nil is returned.
|
297
|
+
def detect_outliers(factor = 3.0, epsilon = 1E-5)
|
298
|
+
half_factor = factor / 2.0
|
299
|
+
quartile1 = percentile(25)
|
300
|
+
quartile3 = percentile(75)
|
301
|
+
iqr = quartile3 - quartile1
|
302
|
+
iqr < epsilon and return
|
303
|
+
result = @elements.inject(Hash.new(0)) do |h, t|
|
304
|
+
extreme =
|
305
|
+
case t
|
306
|
+
when -Infinity..(quartile1 - factor * iqr)
|
307
|
+
:very_low
|
308
|
+
when (quartile1 - factor * iqr)..(quartile1 - half_factor * iqr)
|
309
|
+
:low
|
310
|
+
when (quartile1 + half_factor * iqr)..(quartile3 + factor * iqr)
|
311
|
+
:high
|
312
|
+
when (quartile3 + factor * iqr)..Infinity
|
313
|
+
:very_high
|
314
|
+
end and h[extreme] += 1
|
315
|
+
h
|
316
|
+
end
|
317
|
+
unless result.empty?
|
318
|
+
result[:median] = median
|
319
|
+
result[:iqr] = iqr
|
320
|
+
result[:factor] = factor
|
321
|
+
result
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# Returns the LinearRegression object for the equation a * x + b which
|
326
|
+
# represents the line computed by the linear regression algorithm.
|
327
|
+
def linear_regression
|
328
|
+
@linear_regression ||= LinearRegression.new @elements
|
329
|
+
end
|
330
|
+
|
331
|
+
# Returns a Histogram instance with +bins+ as the number of bins for this
|
332
|
+
# analysis' elements.
|
333
|
+
def histogram(bins)
|
334
|
+
Histogram.new(self, bins)
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
data/lib/more_math.rb
ADDED