more_math 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +2 -0
- data/LICENSE +18 -0
- data/README +20 -0
- data/Rakefile +84 -0
- data/VERSION +1 -0
- data/install.rb +19 -0
- data/lib/more_math/cantor_pairing_funtion.rb +49 -0
- data/lib/more_math/constants/functions_constants.rb +29 -0
- data/lib/more_math/continued_fraction.rb +140 -0
- data/lib/more_math/distributions.rb +134 -0
- data/lib/more_math/exceptions.rb +6 -0
- data/lib/more_math/functions.rb +151 -0
- data/lib/more_math/histogram.rb +62 -0
- data/lib/more_math/linear_regression.rb +78 -0
- data/lib/more_math/newton_bisection.rb +66 -0
- data/lib/more_math/sequence.rb +337 -0
- data/lib/more_math/version.rb +8 -0
- data/lib/more_math.rb +9 -0
- data/make_doc.rb +5 -0
- data/tests/test_analysis.rb +321 -0
- data/tests/test_cantor_pairing_function.rb +23 -0
- data/tests/test_continued_fraction.rb +40 -0
- data/tests/test_distribution.rb +69 -0
- data/tests/test_functions.rb +33 -0
- data/tests/test_histogram.rb +29 -0
- data/tests/test_newton_bisection.rb +28 -0
- metadata +108 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# A histogram gives an overview of a sequence's elements.
|
5
|
+
class Histogram
|
6
|
+
# Create a Histogram for the elements of +sequence+ with +bins+ bins.
|
7
|
+
def initialize(sequence, bins = 10)
|
8
|
+
@sequence = sequence
|
9
|
+
@bins = bins
|
10
|
+
@result = compute
|
11
|
+
end
|
12
|
+
|
13
|
+
# Number of bins for this Histogram.
|
14
|
+
attr_reader :bins
|
15
|
+
|
16
|
+
# Return the computed histogram as an array of arrays.
|
17
|
+
def to_a
|
18
|
+
@result
|
19
|
+
end
|
20
|
+
|
21
|
+
# Display this histogram to +output+, +width+ is the parameter for
|
22
|
+
# +prepare_display+
|
23
|
+
def display(output = $stdout, width = 50)
|
24
|
+
d = prepare_display(width)
|
25
|
+
for l, bar, r in d
|
26
|
+
output << "%11.5f -|%s\n" % [ (l + r) / 2.0, "*" * bar ]
|
27
|
+
end
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Returns an array of tuples (l, c, r) where +l+ is the left bin edge, +c+
|
34
|
+
# the +width+-normalized frequence count value, and +r+ the right bin
|
35
|
+
# edge. +width+ is usually an integer number representing the width of a
|
36
|
+
# histogram bar.
|
37
|
+
def prepare_display(width)
|
38
|
+
r = @result.reverse
|
39
|
+
factor = width.to_f / (r.transpose[1].max)
|
40
|
+
r.map { |l, c, r| [ l, (c * factor).round, r ] }
|
41
|
+
end
|
42
|
+
|
43
|
+
# Computes the histogram and returns it as an array of tuples (l, c, r).
|
44
|
+
def compute
|
45
|
+
@sequence.empty? and return []
|
46
|
+
last_r = -Infinity
|
47
|
+
min = @sequence.min
|
48
|
+
max = @sequence.max
|
49
|
+
step = (max - min) / bins.to_f
|
50
|
+
Array.new(bins) do |i|
|
51
|
+
l = min + i * step
|
52
|
+
r = min + (i + 1) * step
|
53
|
+
c = 0
|
54
|
+
@sequence.each do |x|
|
55
|
+
x > last_r and (x <= r || i == bins - 1) and c += 1
|
56
|
+
end
|
57
|
+
last_r = r
|
58
|
+
[ l, c, r ]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# This class computes a linear regression for the given image and domain data
|
5
|
+
# sets.
|
6
|
+
class LinearRegression
|
7
|
+
def initialize(image, domain = (0...image.size).to_a)
|
8
|
+
image.size != domain.size and raise ArgumentError,
|
9
|
+
"image and domain have unequal sizes"
|
10
|
+
@image, @domain = image, domain
|
11
|
+
compute
|
12
|
+
end
|
13
|
+
|
14
|
+
# The image data as an array.
|
15
|
+
attr_reader :image
|
16
|
+
|
17
|
+
# The domain data as an array.
|
18
|
+
attr_reader :domain
|
19
|
+
|
20
|
+
# The slope of the line.
|
21
|
+
attr_reader :a
|
22
|
+
|
23
|
+
# The offset of the line.
|
24
|
+
attr_reader :b
|
25
|
+
|
26
|
+
# Return true if the slope of the underlying data (not the sample data
|
27
|
+
# passed into the constructor of this LinearRegression instance) is likely
|
28
|
+
# (with alpha level _alpha_) to be zero.
|
29
|
+
def slope_zero?(alpha = 0.05)
|
30
|
+
df = @image.size - 2
|
31
|
+
return true if df <= 0 # not enough values to check
|
32
|
+
t = tvalue(alpha)
|
33
|
+
td = TDistribution.new df
|
34
|
+
t.abs <= td.inverse_probability(1 - alpha.abs / 2.0).abs
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns the residues of this linear regression in relation to the given
|
38
|
+
# domain and image.
|
39
|
+
def residues
|
40
|
+
result = []
|
41
|
+
@domain.zip(@image) do |x, y|
|
42
|
+
result << y - (@a * x + @b)
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def compute
|
50
|
+
size = @image.size
|
51
|
+
sum_xx = sum_xy = sum_x = sum_y = 0.0
|
52
|
+
@domain.zip(@image) do |x, y|
|
53
|
+
x += 1
|
54
|
+
sum_xx += x ** 2
|
55
|
+
sum_xy += x * y
|
56
|
+
sum_x += x
|
57
|
+
sum_y += y
|
58
|
+
end
|
59
|
+
@a = (size * sum_xy - sum_x * sum_y) / (size * sum_xx - sum_x ** 2)
|
60
|
+
@b = (sum_y - @a * sum_x) / size
|
61
|
+
self
|
62
|
+
end
|
63
|
+
|
64
|
+
def tvalue(alpha = 0.05)
|
65
|
+
df = @image.size - 2
|
66
|
+
return 0.0 if df <= 0
|
67
|
+
sse_y = 0.0
|
68
|
+
@domain.zip(@image) do |x, y|
|
69
|
+
f_x = a * x + b
|
70
|
+
sse_y += (y - f_x) ** 2
|
71
|
+
end
|
72
|
+
mean = @image.inject(0.0) { |s, y| s + y } / @image.size
|
73
|
+
sse_x = @domain.inject(0.0) { |s, x| s + (x - mean) ** 2 }
|
74
|
+
t = a / (Math.sqrt(sse_y / df) / Math.sqrt(sse_x))
|
75
|
+
t.nan? ? 0.0 : t
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'more_math/exceptions'
|
2
|
+
require 'more_math'
|
3
|
+
|
4
|
+
module MoreMath
|
5
|
+
# This class is used to find the root of a function with Newton's bisection
|
6
|
+
# method.
|
7
|
+
class NewtonBisection
|
8
|
+
include MoreMath::Exceptions
|
9
|
+
|
10
|
+
# Creates a NewtonBisection instance for +function+, a one-argument block.
|
11
|
+
def initialize(&function)
|
12
|
+
@function = function
|
13
|
+
end
|
14
|
+
|
15
|
+
# The function, passed into the constructor.
|
16
|
+
attr_reader :function
|
17
|
+
|
18
|
+
# Return a bracket around a root, starting from the initial +range+. The
|
19
|
+
# method returns nil, if no such bracket around a root could be found after
|
20
|
+
# +n+ tries with the scaling +factor+.
|
21
|
+
def bracket(range = -1..1, n = 50, factor = 1.6)
|
22
|
+
x1, x2 = range.first.to_f, range.last.to_f
|
23
|
+
x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
|
24
|
+
f1, f2 = @function[x1], @function[x2]
|
25
|
+
n.times do
|
26
|
+
f1 * f2 < 0 and return x1..x2
|
27
|
+
if f1.abs < f2.abs
|
28
|
+
f1 = @function[x1 += factor * (x1 - x2)]
|
29
|
+
else
|
30
|
+
f2 = @function[x2 += factor * (x2 - x1)]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
return
|
34
|
+
end
|
35
|
+
|
36
|
+
# Find the root of function in +range+ and return it. The method raises a
|
37
|
+
# DivergentException, if no such root could be found after +n+ tries and in
|
38
|
+
# the +epsilon+ environment.
|
39
|
+
def solve(range = nil, n = 1 << 16, epsilon = 1E-16)
|
40
|
+
if range
|
41
|
+
x1, x2 = range.first.to_f, range.last.to_f
|
42
|
+
x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
|
43
|
+
elsif range = bracket
|
44
|
+
x1, x2 = range.first, range.last
|
45
|
+
else
|
46
|
+
raise DivergentException, "bracket could not be determined"
|
47
|
+
end
|
48
|
+
f = @function[x1]
|
49
|
+
fmid = @function[x2]
|
50
|
+
f * fmid >= 0 and raise DivergentException, "root must be bracketed in #{range}"
|
51
|
+
root = if f < 0
|
52
|
+
dx = x2 - x1
|
53
|
+
x1
|
54
|
+
else
|
55
|
+
dx = x1 - x2
|
56
|
+
x2
|
57
|
+
end
|
58
|
+
n.times do
|
59
|
+
fmid = @function[xmid = root + (dx *= 0.5)]
|
60
|
+
fmid < 0 and root = xmid
|
61
|
+
dx.abs < epsilon or fmid == 0 and return root
|
62
|
+
end
|
63
|
+
raise DivergentException, "too many iterations (#{n})"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,337 @@
|
|
1
|
+
require 'more_math'
|
2
|
+
|
3
|
+
module MoreMath
|
4
|
+
# This class is used to contain elements and compute various statistical
|
5
|
+
# values for them.
|
6
|
+
class Sequence
|
7
|
+
def initialize(elements)
|
8
|
+
@elements = elements
|
9
|
+
@elements.freeze
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns the array of elements.
|
13
|
+
attr_reader :elements
|
14
|
+
|
15
|
+
# Calls the +block+ for every element of this Sequence.
|
16
|
+
def each(&block)
|
17
|
+
@elements.each(&block)
|
18
|
+
end
|
19
|
+
include Enumerable
|
20
|
+
|
21
|
+
# Returns true if this sequence is empty, otherwise false.
|
22
|
+
def empty?
|
23
|
+
@elements.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the number of elements, on which the analysis is based.
|
27
|
+
def size
|
28
|
+
@elements.size
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the variance of the elements.
|
32
|
+
def variance
|
33
|
+
@variance ||= sum_of_squares / size
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the sample_variance of the elements.
|
37
|
+
def sample_variance
|
38
|
+
@sample_variance ||= size > 1 ? sum_of_squares / (size - 1.0) : 0.0
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns the sum of squares (the sum of the squared deviations) of the
|
42
|
+
# elements.
|
43
|
+
def sum_of_squares
|
44
|
+
@sum_of_squares ||= @elements.inject(0.0) { |s, t| s + (t - arithmetic_mean) ** 2 }
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the standard deviation of the elements.
|
48
|
+
def standard_deviation
|
49
|
+
@sample_deviation ||= Math.sqrt(variance)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns the standard deviation of the elements in percentage of the
|
53
|
+
# arithmetic mean.
|
54
|
+
def standard_deviation_percentage
|
55
|
+
@standard_deviation_percentage ||= 100.0 * standard_deviation / arithmetic_mean
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns the sample standard deviation of the elements.
|
59
|
+
def sample_standard_deviation
|
60
|
+
@sample_standard_deviation ||= Math.sqrt(sample_variance)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Returns the sample standard deviation of the elements in percentage
|
64
|
+
# of the arithmetic mean.
|
65
|
+
def sample_standard_deviation_percentage
|
66
|
+
@sample_standard_deviation_percentage ||= 100.0 * sample_standard_deviation / arithmetic_mean
|
67
|
+
end
|
68
|
+
|
69
|
+
# Returns the sum of all elements.
|
70
|
+
def sum
|
71
|
+
@sum ||= @elements.inject(0.0) { |s, t| s + t }
|
72
|
+
end
|
73
|
+
|
74
|
+
# Returns the arithmetic mean of the elements.
|
75
|
+
def arithmetic_mean
|
76
|
+
@arithmetic_mean ||= sum / size
|
77
|
+
end
|
78
|
+
|
79
|
+
alias mean arithmetic_mean
|
80
|
+
|
81
|
+
# Returns the harmonic mean of the elements. If any of the elements
|
82
|
+
# is less than or equal to 0.0, this method returns NaN.
|
83
|
+
def harmonic_mean
|
84
|
+
@harmonic_mean ||= (
|
85
|
+
sum = @elements.inject(0.0) { |s, t|
|
86
|
+
if t > 0
|
87
|
+
s + 1.0 / t
|
88
|
+
else
|
89
|
+
break nil
|
90
|
+
end
|
91
|
+
}
|
92
|
+
sum ? size / sum : 0 / 0.0
|
93
|
+
)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Returns the geometric mean of the elements. If any of the
|
97
|
+
# elements is less than 0.0, this method returns NaN.
|
98
|
+
def geometric_mean
|
99
|
+
@geometric_mean ||= (
|
100
|
+
sum = @elements.inject(0.0) { |s, t|
|
101
|
+
case
|
102
|
+
when t > 0
|
103
|
+
s + Math.log(t)
|
104
|
+
when t == 0
|
105
|
+
break :null
|
106
|
+
else
|
107
|
+
break nil
|
108
|
+
end
|
109
|
+
}
|
110
|
+
case sum
|
111
|
+
when :null
|
112
|
+
0.0
|
113
|
+
when Float
|
114
|
+
Math.exp(sum / size)
|
115
|
+
else
|
116
|
+
0 / 0.0
|
117
|
+
end
|
118
|
+
)
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns the minimum of the elements.
|
122
|
+
def min
|
123
|
+
@min ||= @elements.min
|
124
|
+
end
|
125
|
+
|
126
|
+
# Returns the maximum of the elements.
|
127
|
+
def max
|
128
|
+
@max ||= @elements.max
|
129
|
+
end
|
130
|
+
|
131
|
+
# Returns the +p+-percentile of the elements.
|
132
|
+
# There are many methods to compute the percentile, this method uses the
|
133
|
+
# the weighted average at x_(n + 1)p, which allows p to be in 0...100
|
134
|
+
# (excluding the 100).
|
135
|
+
def percentile(p = 50)
|
136
|
+
(0...100).include?(p) or
|
137
|
+
raise ArgumentError, "p = #{p}, but has to be in (0...100)"
|
138
|
+
p /= 100.0
|
139
|
+
@sorted ||= @elements.sort
|
140
|
+
r = p * (@sorted.size + 1)
|
141
|
+
r_i = r.to_i
|
142
|
+
r_f = r - r_i
|
143
|
+
if r_i >= 1
|
144
|
+
result = @sorted[r_i - 1]
|
145
|
+
if r_i < @sorted.size
|
146
|
+
result += r_f * (@sorted[r_i] - @sorted[r_i - 1])
|
147
|
+
end
|
148
|
+
else
|
149
|
+
result = @sorted[0]
|
150
|
+
end
|
151
|
+
result
|
152
|
+
end
|
153
|
+
|
154
|
+
alias median percentile
|
155
|
+
|
156
|
+
# Use an approximation of the Welch-Satterthwaite equation to compute the
|
157
|
+
# degrees of freedom for Welch's t-test.
|
158
|
+
def compute_welch_df(other)
|
159
|
+
(sample_variance / size + other.sample_variance / other.size) ** 2 / (
|
160
|
+
(sample_variance ** 2 / (size ** 2 * (size - 1))) +
|
161
|
+
(other.sample_variance ** 2 / (other.size ** 2 * (other.size - 1))))
|
162
|
+
end
|
163
|
+
|
164
|
+
# Returns the t value of the Welch's t-test between this Sequence
|
165
|
+
# instance and the +other+.
|
166
|
+
def t_welch(other)
|
167
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
168
|
+
noise = Math.sqrt(sample_variance / size +
|
169
|
+
other.sample_variance / other.size)
|
170
|
+
signal / noise
|
171
|
+
rescue Errno::EDOM
|
172
|
+
0.0
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns an estimation of the common standard deviation of the
|
176
|
+
# elements of this and +other+.
|
177
|
+
def common_standard_deviation(other)
|
178
|
+
Math.sqrt(common_variance(other))
|
179
|
+
end
|
180
|
+
|
181
|
+
# Returns an estimation of the common variance of the elements of this
|
182
|
+
# and +other+.
|
183
|
+
def common_variance(other)
|
184
|
+
(size - 1) * sample_variance + (other.size - 1) *
|
185
|
+
other.sample_variance / (size + other.size - 2)
|
186
|
+
end
|
187
|
+
|
188
|
+
# Compute the # degrees of freedom for Student's t-test.
|
189
|
+
def compute_student_df(other)
|
190
|
+
size + other.size - 2
|
191
|
+
end
|
192
|
+
|
193
|
+
# Returns the t value of the Student's t-test between this Sequence
|
194
|
+
# instance and the +other+.
|
195
|
+
def t_student(other)
|
196
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
197
|
+
noise = common_standard_deviation(other) *
|
198
|
+
Math.sqrt(size ** -1 + size ** -1)
|
199
|
+
rescue Errno::EDOM
|
200
|
+
0.0
|
201
|
+
end
|
202
|
+
|
203
|
+
# Compute a sample size, that will more likely yield a mean difference
|
204
|
+
# between this instance's elements and those of +other+. Use +alpha+
|
205
|
+
# and +beta+ as levels for the first- and second-order errors.
|
206
|
+
def suggested_sample_size(other, alpha = 0.05, beta = 0.05)
|
207
|
+
alpha, beta = alpha.abs, beta.abs
|
208
|
+
signal = arithmetic_mean - other.arithmetic_mean
|
209
|
+
df = size + other.size - 2
|
210
|
+
pooled_variance_estimate = (sum_of_squares + other.sum_of_squares) / df
|
211
|
+
td = TDistribution.new df
|
212
|
+
(((td.inverse_probability(alpha) + td.inverse_probability(beta)) *
|
213
|
+
Math.sqrt(pooled_variance_estimate)) / signal) ** 2
|
214
|
+
end
|
215
|
+
|
216
|
+
# Return true, if the Sequence instance covers the +other+, that is their
|
217
|
+
# arithmetic mean value is most likely to be equal for the +alpha+ error
|
218
|
+
# level.
|
219
|
+
def cover?(other, alpha = 0.05)
|
220
|
+
t = t_welch(other)
|
221
|
+
td = TDistribution.new(compute_welch_df(other))
|
222
|
+
t.abs < td.inverse_probability(1 - alpha.abs / 2.0)
|
223
|
+
end
|
224
|
+
|
225
|
+
# Return the confidence interval for the arithmetic mean with alpha level +alpha+ of
|
226
|
+
# the elements of this Sequence instance as a Range object.
|
227
|
+
def confidence_interval(alpha = 0.05)
|
228
|
+
td = TDistribution.new(size - 1)
|
229
|
+
t = td.inverse_probability(alpha / 2).abs
|
230
|
+
delta = t * sample_standard_deviation / Math.sqrt(size)
|
231
|
+
(arithmetic_mean - delta)..(arithmetic_mean + delta)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Returns the array of autovariances (of length size - 1).
|
235
|
+
def autovariance
|
236
|
+
Array.new(size - 1) do |k|
|
237
|
+
s = 0.0
|
238
|
+
0.upto(size - k - 1) do |i|
|
239
|
+
s += (@elements[i] - arithmetic_mean) * (@elements[i + k] - arithmetic_mean)
|
240
|
+
end
|
241
|
+
s / size
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# Returns the array of autocorrelation values c_k / c_0 (of length size -
|
246
|
+
# 1).
|
247
|
+
def autocorrelation
|
248
|
+
c = autovariance
|
249
|
+
Array.new(c.size) { |k| c[k] / c[0] }
|
250
|
+
end
|
251
|
+
|
252
|
+
# Returns the d-value for the Durbin-Watson statistic. The value is d << 2
|
253
|
+
# for positive, d >> 2 for negative and d around 2 for no autocorrelation.
|
254
|
+
def durbin_watson_statistic
|
255
|
+
e = linear_regression.residues
|
256
|
+
e.size <= 1 and return 2.0
|
257
|
+
(1...e.size).inject(0.0) { |s, i| s + (e[i] - e[i - 1]) ** 2 } /
|
258
|
+
e.inject(0.0) { |s, x| s + x ** 2 }
|
259
|
+
end
|
260
|
+
|
261
|
+
# Returns the q value of the Ljung-Box statistic for the number of lags
|
262
|
+
# +lags+. A higher value might indicate autocorrelation in the elements of
|
263
|
+
# this Sequence instance. This method returns nil if there weren't enough
|
264
|
+
# (at least lags) lags available.
|
265
|
+
def ljung_box_statistic(lags = 20)
|
266
|
+
r = autocorrelation
|
267
|
+
lags >= r.size and return
|
268
|
+
n = size
|
269
|
+
n * (n + 2) * (1..lags).inject(0.0) { |s, i| s + r[i] ** 2 / (n - i) }
|
270
|
+
end
|
271
|
+
|
272
|
+
# This method tries to detect autocorrelation with the Ljung-Box
|
273
|
+
# statistic. If enough lags can be considered it returns a hash with
|
274
|
+
# results, otherwise nil is returned. The keys are
|
275
|
+
# :lags:: the number of lags,
|
276
|
+
# :alpha_level:: the alpha level for the test,
|
277
|
+
# :q:: the value of the ljung_box_statistic,
|
278
|
+
# :p:: the p-value computed, if p is higher than alpha no correlation was detected,
|
279
|
+
# :detected:: true if a correlation was found.
|
280
|
+
def detect_autocorrelation(lags = 20, alpha_level = 0.05)
|
281
|
+
if q = ljung_box_statistic(lags)
|
282
|
+
p = ChiSquareDistribution.new(lags).probability(q)
|
283
|
+
return {
|
284
|
+
:lags => lags,
|
285
|
+
:alpha_level => alpha_level,
|
286
|
+
:q => q,
|
287
|
+
:p => p,
|
288
|
+
:detected => p >= 1 - alpha_level,
|
289
|
+
}
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Return a result hash with the number of :very_low, :low, :high, and
|
294
|
+
# :very_high outliers, determined by the box plotting algorithm run with
|
295
|
+
# :median and :iqr parameters. If no outliers were found or the iqr is
|
296
|
+
# less than epsilon, nil is returned.
|
297
|
+
def detect_outliers(factor = 3.0, epsilon = 1E-5)
|
298
|
+
half_factor = factor / 2.0
|
299
|
+
quartile1 = percentile(25)
|
300
|
+
quartile3 = percentile(75)
|
301
|
+
iqr = quartile3 - quartile1
|
302
|
+
iqr < epsilon and return
|
303
|
+
result = @elements.inject(Hash.new(0)) do |h, t|
|
304
|
+
extreme =
|
305
|
+
case t
|
306
|
+
when -Infinity..(quartile1 - factor * iqr)
|
307
|
+
:very_low
|
308
|
+
when (quartile1 - factor * iqr)..(quartile1 - half_factor * iqr)
|
309
|
+
:low
|
310
|
+
when (quartile1 + half_factor * iqr)..(quartile3 + factor * iqr)
|
311
|
+
:high
|
312
|
+
when (quartile3 + factor * iqr)..Infinity
|
313
|
+
:very_high
|
314
|
+
end and h[extreme] += 1
|
315
|
+
h
|
316
|
+
end
|
317
|
+
unless result.empty?
|
318
|
+
result[:median] = median
|
319
|
+
result[:iqr] = iqr
|
320
|
+
result[:factor] = factor
|
321
|
+
result
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# Returns the LinearRegression object for the equation a * x + b which
|
326
|
+
# represents the line computed by the linear regression algorithm.
|
327
|
+
def linear_regression
|
328
|
+
@linear_regression ||= LinearRegression.new @elements
|
329
|
+
end
|
330
|
+
|
331
|
+
# Returns a Histogram instance with +bins+ as the number of bins for this
|
332
|
+
# analysis' elements.
|
333
|
+
def histogram(bins)
|
334
|
+
Histogram.new(self, bins)
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
data/lib/more_math.rb
ADDED