more_math 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,62 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # A histogram gives an overview of a sequence's elements.
5
+ class Histogram
6
+ # Create a Histogram for the elements of +sequence+ with +bins+ bins.
7
+ def initialize(sequence, bins = 10)
8
+ @sequence = sequence
9
+ @bins = bins
10
+ @result = compute
11
+ end
12
+
13
+ # Number of bins for this Histogram.
14
+ attr_reader :bins
15
+
16
+ # Return the computed histogram as an array of arrays.
17
+ def to_a
18
+ @result
19
+ end
20
+
21
+ # Display this histogram to +output+, +width+ is the parameter for
22
+ # +prepare_display+
23
+ def display(output = $stdout, width = 50)
24
+ d = prepare_display(width)
25
+ for l, bar, r in d
26
+ output << "%11.5f -|%s\n" % [ (l + r) / 2.0, "*" * bar ]
27
+ end
28
+ self
29
+ end
30
+
31
+ private
32
+
33
+ # Returns an array of tuples (l, c, r) where +l+ is the left bin edge, +c+
34
+ # the +width+-normalized frequence count value, and +r+ the right bin
35
+ # edge. +width+ is usually an integer number representing the width of a
36
+ # histogram bar.
37
+ def prepare_display(width)
38
+ r = @result.reverse
39
+ factor = width.to_f / (r.transpose[1].max)
40
+ r.map { |l, c, r| [ l, (c * factor).round, r ] }
41
+ end
42
+
43
+ # Computes the histogram and returns it as an array of tuples (l, c, r).
44
+ def compute
45
+ @sequence.empty? and return []
46
+ last_r = -Infinity
47
+ min = @sequence.min
48
+ max = @sequence.max
49
+ step = (max - min) / bins.to_f
50
+ Array.new(bins) do |i|
51
+ l = min + i * step
52
+ r = min + (i + 1) * step
53
+ c = 0
54
+ @sequence.each do |x|
55
+ x > last_r and (x <= r || i == bins - 1) and c += 1
56
+ end
57
+ last_r = r
58
+ [ l, c, r ]
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,78 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # This class computes a linear regression for the given image and domain data
5
+ # sets.
6
+ class LinearRegression
7
+ def initialize(image, domain = (0...image.size).to_a)
8
+ image.size != domain.size and raise ArgumentError,
9
+ "image and domain have unequal sizes"
10
+ @image, @domain = image, domain
11
+ compute
12
+ end
13
+
14
+ # The image data as an array.
15
+ attr_reader :image
16
+
17
+ # The domain data as an array.
18
+ attr_reader :domain
19
+
20
+ # The slope of the line.
21
+ attr_reader :a
22
+
23
+ # The offset of the line.
24
+ attr_reader :b
25
+
26
+ # Return true if the slope of the underlying data (not the sample data
27
+ # passed into the constructor of this LinearRegression instance) is likely
28
+ # (with alpha level _alpha_) to be zero.
29
+ def slope_zero?(alpha = 0.05)
30
+ df = @image.size - 2
31
+ return true if df <= 0 # not enough values to check
32
+ t = tvalue(alpha)
33
+ td = TDistribution.new df
34
+ t.abs <= td.inverse_probability(1 - alpha.abs / 2.0).abs
35
+ end
36
+
37
+ # Returns the residues of this linear regression in relation to the given
38
+ # domain and image.
39
+ def residues
40
+ result = []
41
+ @domain.zip(@image) do |x, y|
42
+ result << y - (@a * x + @b)
43
+ end
44
+ result
45
+ end
46
+
47
+ private
48
+
49
+ def compute
50
+ size = @image.size
51
+ sum_xx = sum_xy = sum_x = sum_y = 0.0
52
+ @domain.zip(@image) do |x, y|
53
+ x += 1
54
+ sum_xx += x ** 2
55
+ sum_xy += x * y
56
+ sum_x += x
57
+ sum_y += y
58
+ end
59
+ @a = (size * sum_xy - sum_x * sum_y) / (size * sum_xx - sum_x ** 2)
60
+ @b = (sum_y - @a * sum_x) / size
61
+ self
62
+ end
63
+
64
+ def tvalue(alpha = 0.05)
65
+ df = @image.size - 2
66
+ return 0.0 if df <= 0
67
+ sse_y = 0.0
68
+ @domain.zip(@image) do |x, y|
69
+ f_x = a * x + b
70
+ sse_y += (y - f_x) ** 2
71
+ end
72
+ mean = @image.inject(0.0) { |s, y| s + y } / @image.size
73
+ sse_x = @domain.inject(0.0) { |s, x| s + (x - mean) ** 2 }
74
+ t = a / (Math.sqrt(sse_y / df) / Math.sqrt(sse_x))
75
+ t.nan? ? 0.0 : t
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,66 @@
1
+ require 'more_math/exceptions'
2
+ require 'more_math'
3
+
4
+ module MoreMath
5
+ # This class is used to find the root of a function with Newton's bisection
6
+ # method.
7
+ class NewtonBisection
8
+ include MoreMath::Exceptions
9
+
10
+ # Creates a NewtonBisection instance for +function+, a one-argument block.
11
+ def initialize(&function)
12
+ @function = function
13
+ end
14
+
15
+ # The function, passed into the constructor.
16
+ attr_reader :function
17
+
18
+ # Return a bracket around a root, starting from the initial +range+. The
19
+ # method returns nil, if no such bracket around a root could be found after
20
+ # +n+ tries with the scaling +factor+.
21
+ def bracket(range = -1..1, n = 50, factor = 1.6)
22
+ x1, x2 = range.first.to_f, range.last.to_f
23
+ x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
24
+ f1, f2 = @function[x1], @function[x2]
25
+ n.times do
26
+ f1 * f2 < 0 and return x1..x2
27
+ if f1.abs < f2.abs
28
+ f1 = @function[x1 += factor * (x1 - x2)]
29
+ else
30
+ f2 = @function[x2 += factor * (x2 - x1)]
31
+ end
32
+ end
33
+ return
34
+ end
35
+
36
+ # Find the root of function in +range+ and return it. The method raises a
37
+ # DivergentException, if no such root could be found after +n+ tries and in
38
+ # the +epsilon+ environment.
39
+ def solve(range = nil, n = 1 << 16, epsilon = 1E-16)
40
+ if range
41
+ x1, x2 = range.first.to_f, range.last.to_f
42
+ x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
43
+ elsif range = bracket
44
+ x1, x2 = range.first, range.last
45
+ else
46
+ raise DivergentException, "bracket could not be determined"
47
+ end
48
+ f = @function[x1]
49
+ fmid = @function[x2]
50
+ f * fmid >= 0 and raise DivergentException, "root must be bracketed in #{range}"
51
+ root = if f < 0
52
+ dx = x2 - x1
53
+ x1
54
+ else
55
+ dx = x1 - x2
56
+ x2
57
+ end
58
+ n.times do
59
+ fmid = @function[xmid = root + (dx *= 0.5)]
60
+ fmid < 0 and root = xmid
61
+ dx.abs < epsilon or fmid == 0 and return root
62
+ end
63
+ raise DivergentException, "too many iterations (#{n})"
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,337 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # This class is used to contain elements and compute various statistical
5
+ # values for them.
6
+ class Sequence
7
+ def initialize(elements)
8
+ @elements = elements
9
+ @elements.freeze
10
+ end
11
+
12
+ # Returns the array of elements.
13
+ attr_reader :elements
14
+
15
+ # Calls the +block+ for every element of this Sequence.
16
+ def each(&block)
17
+ @elements.each(&block)
18
+ end
19
+ include Enumerable
20
+
21
+ # Returns true if this sequence is empty, otherwise false.
22
+ def empty?
23
+ @elements.empty?
24
+ end
25
+
26
+ # Returns the number of elements, on which the analysis is based.
27
+ def size
28
+ @elements.size
29
+ end
30
+
31
+ # Returns the variance of the elements.
32
+ def variance
33
+ @variance ||= sum_of_squares / size
34
+ end
35
+
36
+ # Returns the sample_variance of the elements.
37
+ def sample_variance
38
+ @sample_variance ||= size > 1 ? sum_of_squares / (size - 1.0) : 0.0
39
+ end
40
+
41
+ # Returns the sum of squares (the sum of the squared deviations) of the
42
+ # elements.
43
+ def sum_of_squares
44
+ @sum_of_squares ||= @elements.inject(0.0) { |s, t| s + (t - arithmetic_mean) ** 2 }
45
+ end
46
+
47
+ # Returns the standard deviation of the elements.
48
+ def standard_deviation
49
+ @sample_deviation ||= Math.sqrt(variance)
50
+ end
51
+
52
+ # Returns the standard deviation of the elements in percentage of the
53
+ # arithmetic mean.
54
+ def standard_deviation_percentage
55
+ @standard_deviation_percentage ||= 100.0 * standard_deviation / arithmetic_mean
56
+ end
57
+
58
+ # Returns the sample standard deviation of the elements.
59
+ def sample_standard_deviation
60
+ @sample_standard_deviation ||= Math.sqrt(sample_variance)
61
+ end
62
+
63
+ # Returns the sample standard deviation of the elements in percentage
64
+ # of the arithmetic mean.
65
+ def sample_standard_deviation_percentage
66
+ @sample_standard_deviation_percentage ||= 100.0 * sample_standard_deviation / arithmetic_mean
67
+ end
68
+
69
+ # Returns the sum of all elements.
70
+ def sum
71
+ @sum ||= @elements.inject(0.0) { |s, t| s + t }
72
+ end
73
+
74
+ # Returns the arithmetic mean of the elements.
75
+ def arithmetic_mean
76
+ @arithmetic_mean ||= sum / size
77
+ end
78
+
79
+ alias mean arithmetic_mean
80
+
81
+ # Returns the harmonic mean of the elements. If any of the elements
82
+ # is less than or equal to 0.0, this method returns NaN.
83
+ def harmonic_mean
84
+ @harmonic_mean ||= (
85
+ sum = @elements.inject(0.0) { |s, t|
86
+ if t > 0
87
+ s + 1.0 / t
88
+ else
89
+ break nil
90
+ end
91
+ }
92
+ sum ? size / sum : 0 / 0.0
93
+ )
94
+ end
95
+
96
+ # Returns the geometric mean of the elements. If any of the
97
+ # elements is less than 0.0, this method returns NaN.
98
+ def geometric_mean
99
+ @geometric_mean ||= (
100
+ sum = @elements.inject(0.0) { |s, t|
101
+ case
102
+ when t > 0
103
+ s + Math.log(t)
104
+ when t == 0
105
+ break :null
106
+ else
107
+ break nil
108
+ end
109
+ }
110
+ case sum
111
+ when :null
112
+ 0.0
113
+ when Float
114
+ Math.exp(sum / size)
115
+ else
116
+ 0 / 0.0
117
+ end
118
+ )
119
+ end
120
+
121
+ # Returns the minimum of the elements.
122
+ def min
123
+ @min ||= @elements.min
124
+ end
125
+
126
+ # Returns the maximum of the elements.
127
+ def max
128
+ @max ||= @elements.max
129
+ end
130
+
131
+ # Returns the +p+-percentile of the elements.
132
+ # There are many methods to compute the percentile, this method uses the
133
+ # the weighted average at x_(n + 1)p, which allows p to be in 0...100
134
+ # (excluding the 100).
135
+ def percentile(p = 50)
136
+ (0...100).include?(p) or
137
+ raise ArgumentError, "p = #{p}, but has to be in (0...100)"
138
+ p /= 100.0
139
+ @sorted ||= @elements.sort
140
+ r = p * (@sorted.size + 1)
141
+ r_i = r.to_i
142
+ r_f = r - r_i
143
+ if r_i >= 1
144
+ result = @sorted[r_i - 1]
145
+ if r_i < @sorted.size
146
+ result += r_f * (@sorted[r_i] - @sorted[r_i - 1])
147
+ end
148
+ else
149
+ result = @sorted[0]
150
+ end
151
+ result
152
+ end
153
+
154
+ alias median percentile
155
+
156
+ # Use an approximation of the Welch-Satterthwaite equation to compute the
157
+ # degrees of freedom for Welch's t-test.
158
+ def compute_welch_df(other)
159
+ (sample_variance / size + other.sample_variance / other.size) ** 2 / (
160
+ (sample_variance ** 2 / (size ** 2 * (size - 1))) +
161
+ (other.sample_variance ** 2 / (other.size ** 2 * (other.size - 1))))
162
+ end
163
+
164
+ # Returns the t value of the Welch's t-test between this Sequence
165
+ # instance and the +other+.
166
+ def t_welch(other)
167
+ signal = arithmetic_mean - other.arithmetic_mean
168
+ noise = Math.sqrt(sample_variance / size +
169
+ other.sample_variance / other.size)
170
+ signal / noise
171
+ rescue Errno::EDOM
172
+ 0.0
173
+ end
174
+
175
+ # Returns an estimation of the common standard deviation of the
176
+ # elements of this and +other+.
177
+ def common_standard_deviation(other)
178
+ Math.sqrt(common_variance(other))
179
+ end
180
+
181
+ # Returns an estimation of the common variance of the elements of this
182
+ # and +other+.
183
+ def common_variance(other)
184
+ (size - 1) * sample_variance + (other.size - 1) *
185
+ other.sample_variance / (size + other.size - 2)
186
+ end
187
+
188
+ # Compute the # degrees of freedom for Student's t-test.
189
+ def compute_student_df(other)
190
+ size + other.size - 2
191
+ end
192
+
193
+ # Returns the t value of the Student's t-test between this Sequence
194
+ # instance and the +other+.
195
+ def t_student(other)
196
+ signal = arithmetic_mean - other.arithmetic_mean
197
+ noise = common_standard_deviation(other) *
198
+ Math.sqrt(size ** -1 + size ** -1)
199
+ rescue Errno::EDOM
200
+ 0.0
201
+ end
202
+
203
+ # Compute a sample size, that will more likely yield a mean difference
204
+ # between this instance's elements and those of +other+. Use +alpha+
205
+ # and +beta+ as levels for the first- and second-order errors.
206
+ def suggested_sample_size(other, alpha = 0.05, beta = 0.05)
207
+ alpha, beta = alpha.abs, beta.abs
208
+ signal = arithmetic_mean - other.arithmetic_mean
209
+ df = size + other.size - 2
210
+ pooled_variance_estimate = (sum_of_squares + other.sum_of_squares) / df
211
+ td = TDistribution.new df
212
+ (((td.inverse_probability(alpha) + td.inverse_probability(beta)) *
213
+ Math.sqrt(pooled_variance_estimate)) / signal) ** 2
214
+ end
215
+
216
+ # Return true, if the Sequence instance covers the +other+, that is their
217
+ # arithmetic mean value is most likely to be equal for the +alpha+ error
218
+ # level.
219
+ def cover?(other, alpha = 0.05)
220
+ t = t_welch(other)
221
+ td = TDistribution.new(compute_welch_df(other))
222
+ t.abs < td.inverse_probability(1 - alpha.abs / 2.0)
223
+ end
224
+
225
+ # Return the confidence interval for the arithmetic mean with alpha level +alpha+ of
226
+ # the elements of this Sequence instance as a Range object.
227
+ def confidence_interval(alpha = 0.05)
228
+ td = TDistribution.new(size - 1)
229
+ t = td.inverse_probability(alpha / 2).abs
230
+ delta = t * sample_standard_deviation / Math.sqrt(size)
231
+ (arithmetic_mean - delta)..(arithmetic_mean + delta)
232
+ end
233
+
234
+ # Returns the array of autovariances (of length size - 1).
235
+ def autovariance
236
+ Array.new(size - 1) do |k|
237
+ s = 0.0
238
+ 0.upto(size - k - 1) do |i|
239
+ s += (@elements[i] - arithmetic_mean) * (@elements[i + k] - arithmetic_mean)
240
+ end
241
+ s / size
242
+ end
243
+ end
244
+
245
+ # Returns the array of autocorrelation values c_k / c_0 (of length size -
246
+ # 1).
247
+ def autocorrelation
248
+ c = autovariance
249
+ Array.new(c.size) { |k| c[k] / c[0] }
250
+ end
251
+
252
+ # Returns the d-value for the Durbin-Watson statistic. The value is d << 2
253
+ # for positive, d >> 2 for negative and d around 2 for no autocorrelation.
254
+ def durbin_watson_statistic
255
+ e = linear_regression.residues
256
+ e.size <= 1 and return 2.0
257
+ (1...e.size).inject(0.0) { |s, i| s + (e[i] - e[i - 1]) ** 2 } /
258
+ e.inject(0.0) { |s, x| s + x ** 2 }
259
+ end
260
+
261
+ # Returns the q value of the Ljung-Box statistic for the number of lags
262
+ # +lags+. A higher value might indicate autocorrelation in the elements of
263
+ # this Sequence instance. This method returns nil if there weren't enough
264
+ # (at least lags) lags available.
265
+ def ljung_box_statistic(lags = 20)
266
+ r = autocorrelation
267
+ lags >= r.size and return
268
+ n = size
269
+ n * (n + 2) * (1..lags).inject(0.0) { |s, i| s + r[i] ** 2 / (n - i) }
270
+ end
271
+
272
+ # This method tries to detect autocorrelation with the Ljung-Box
273
+ # statistic. If enough lags can be considered it returns a hash with
274
+ # results, otherwise nil is returned. The keys are
275
+ # :lags:: the number of lags,
276
+ # :alpha_level:: the alpha level for the test,
277
+ # :q:: the value of the ljung_box_statistic,
278
+ # :p:: the p-value computed, if p is higher than alpha no correlation was detected,
279
+ # :detected:: true if a correlation was found.
280
+ def detect_autocorrelation(lags = 20, alpha_level = 0.05)
281
+ if q = ljung_box_statistic(lags)
282
+ p = ChiSquareDistribution.new(lags).probability(q)
283
+ return {
284
+ :lags => lags,
285
+ :alpha_level => alpha_level,
286
+ :q => q,
287
+ :p => p,
288
+ :detected => p >= 1 - alpha_level,
289
+ }
290
+ end
291
+ end
292
+
293
+ # Return a result hash with the number of :very_low, :low, :high, and
294
+ # :very_high outliers, determined by the box plotting algorithm run with
295
+ # :median and :iqr parameters. If no outliers were found or the iqr is
296
+ # less than epsilon, nil is returned.
297
+ def detect_outliers(factor = 3.0, epsilon = 1E-5)
298
+ half_factor = factor / 2.0
299
+ quartile1 = percentile(25)
300
+ quartile3 = percentile(75)
301
+ iqr = quartile3 - quartile1
302
+ iqr < epsilon and return
303
+ result = @elements.inject(Hash.new(0)) do |h, t|
304
+ extreme =
305
+ case t
306
+ when -Infinity..(quartile1 - factor * iqr)
307
+ :very_low
308
+ when (quartile1 - factor * iqr)..(quartile1 - half_factor * iqr)
309
+ :low
310
+ when (quartile1 + half_factor * iqr)..(quartile3 + factor * iqr)
311
+ :high
312
+ when (quartile3 + factor * iqr)..Infinity
313
+ :very_high
314
+ end and h[extreme] += 1
315
+ h
316
+ end
317
+ unless result.empty?
318
+ result[:median] = median
319
+ result[:iqr] = iqr
320
+ result[:factor] = factor
321
+ result
322
+ end
323
+ end
324
+
325
+ # Returns the LinearRegression object for the equation a * x + b which
326
+ # represents the line computed by the linear regression algorithm.
327
+ def linear_regression
328
+ @linear_regression ||= LinearRegression.new @elements
329
+ end
330
+
331
+ # Returns a Histogram instance with +bins+ as the number of bins for this
332
+ # analysis' elements.
333
+ def histogram(bins)
334
+ Histogram.new(self, bins)
335
+ end
336
+ end
337
+ end
@@ -0,0 +1,8 @@
1
+ module MoreMath
2
+ # MoreMath version
3
+ VERSION = '0.0.0'
4
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
7
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
8
+ end
data/lib/more_math.rb ADDED
@@ -0,0 +1,9 @@
1
+ module MoreMath
2
+ Infinity = 1.0 / 0 # Refers to floating point infinity.
3
+
4
+ Dir.chdir(File.join(File.dirname(__FILE__), 'more_math')) do
5
+ Dir['**/*.rb'].each do |filename|
6
+ require File.join('more_math', filename.gsub(/\.rb\Z/, ''))
7
+ end
8
+ end
9
+ end
data/make_doc.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ puts "Creating documentation."
4
+ system "rdoc --main README --title 'MoreMath -- More Math in Ruby'"\
5
+ " -d #{Dir['lib/**/*.rb'] * ' '} README"