more_math 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # A histogram gives an overview of a sequence's elements.
5
+ class Histogram
6
+ # Create a Histogram for the elements of +sequence+ with +bins+ bins.
7
+ def initialize(sequence, bins = 10)
8
+ @sequence = sequence
9
+ @bins = bins
10
+ @result = compute
11
+ end
12
+
13
+ # Number of bins for this Histogram.
14
+ attr_reader :bins
15
+
16
+ # Return the computed histogram as an array of arrays.
17
+ def to_a
18
+ @result
19
+ end
20
+
21
+ # Display this histogram to +output+, +width+ is the parameter for
22
+ # +prepare_display+
23
+ def display(output = $stdout, width = 50)
24
+ d = prepare_display(width)
25
+ for l, bar, r in d
26
+ output << "%11.5f -|%s\n" % [ (l + r) / 2.0, "*" * bar ]
27
+ end
28
+ self
29
+ end
30
+
31
+ private
32
+
33
+ # Returns an array of tuples (l, c, r) where +l+ is the left bin edge, +c+
34
+ # the +width+-normalized frequence count value, and +r+ the right bin
35
+ # edge. +width+ is usually an integer number representing the width of a
36
+ # histogram bar.
37
+ def prepare_display(width)
38
+ r = @result.reverse
39
+ factor = width.to_f / (r.transpose[1].max)
40
+ r.map { |l, c, r| [ l, (c * factor).round, r ] }
41
+ end
42
+
43
+ # Computes the histogram and returns it as an array of tuples (l, c, r).
44
+ def compute
45
+ @sequence.empty? and return []
46
+ last_r = -Infinity
47
+ min = @sequence.min
48
+ max = @sequence.max
49
+ step = (max - min) / bins.to_f
50
+ Array.new(bins) do |i|
51
+ l = min + i * step
52
+ r = min + (i + 1) * step
53
+ c = 0
54
+ @sequence.each do |x|
55
+ x > last_r and (x <= r || i == bins - 1) and c += 1
56
+ end
57
+ last_r = r
58
+ [ l, c, r ]
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,78 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # This class computes a linear regression for the given image and domain data
5
+ # sets.
6
+ class LinearRegression
7
+ def initialize(image, domain = (0...image.size).to_a)
8
+ image.size != domain.size and raise ArgumentError,
9
+ "image and domain have unequal sizes"
10
+ @image, @domain = image, domain
11
+ compute
12
+ end
13
+
14
+ # The image data as an array.
15
+ attr_reader :image
16
+
17
+ # The domain data as an array.
18
+ attr_reader :domain
19
+
20
+ # The slope of the line.
21
+ attr_reader :a
22
+
23
+ # The offset of the line.
24
+ attr_reader :b
25
+
26
+ # Return true if the slope of the underlying data (not the sample data
27
+ # passed into the constructor of this LinearRegression instance) is likely
28
+ # (with alpha level _alpha_) to be zero.
29
+ def slope_zero?(alpha = 0.05)
30
+ df = @image.size - 2
31
+ return true if df <= 0 # not enough values to check
32
+ t = tvalue(alpha)
33
+ td = TDistribution.new df
34
+ t.abs <= td.inverse_probability(1 - alpha.abs / 2.0).abs
35
+ end
36
+
37
+ # Returns the residues of this linear regression in relation to the given
38
+ # domain and image.
39
+ def residues
40
+ result = []
41
+ @domain.zip(@image) do |x, y|
42
+ result << y - (@a * x + @b)
43
+ end
44
+ result
45
+ end
46
+
47
+ private
48
+
49
+ def compute
50
+ size = @image.size
51
+ sum_xx = sum_xy = sum_x = sum_y = 0.0
52
+ @domain.zip(@image) do |x, y|
53
+ x += 1
54
+ sum_xx += x ** 2
55
+ sum_xy += x * y
56
+ sum_x += x
57
+ sum_y += y
58
+ end
59
+ @a = (size * sum_xy - sum_x * sum_y) / (size * sum_xx - sum_x ** 2)
60
+ @b = (sum_y - @a * sum_x) / size
61
+ self
62
+ end
63
+
64
+ def tvalue(alpha = 0.05)
65
+ df = @image.size - 2
66
+ return 0.0 if df <= 0
67
+ sse_y = 0.0
68
+ @domain.zip(@image) do |x, y|
69
+ f_x = a * x + b
70
+ sse_y += (y - f_x) ** 2
71
+ end
72
+ mean = @image.inject(0.0) { |s, y| s + y } / @image.size
73
+ sse_x = @domain.inject(0.0) { |s, x| s + (x - mean) ** 2 }
74
+ t = a / (Math.sqrt(sse_y / df) / Math.sqrt(sse_x))
75
+ t.nan? ? 0.0 : t
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,66 @@
1
+ require 'more_math/exceptions'
2
+ require 'more_math'
3
+
4
+ module MoreMath
5
+ # This class is used to find the root of a function with Newton's bisection
6
+ # method.
7
+ class NewtonBisection
8
+ include MoreMath::Exceptions
9
+
10
+ # Creates a NewtonBisection instance for +function+, a one-argument block.
11
+ def initialize(&function)
12
+ @function = function
13
+ end
14
+
15
+ # The function, passed into the constructor.
16
+ attr_reader :function
17
+
18
+ # Return a bracket around a root, starting from the initial +range+. The
19
+ # method returns nil, if no such bracket around a root could be found after
20
+ # +n+ tries with the scaling +factor+.
21
+ def bracket(range = -1..1, n = 50, factor = 1.6)
22
+ x1, x2 = range.first.to_f, range.last.to_f
23
+ x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
24
+ f1, f2 = @function[x1], @function[x2]
25
+ n.times do
26
+ f1 * f2 < 0 and return x1..x2
27
+ if f1.abs < f2.abs
28
+ f1 = @function[x1 += factor * (x1 - x2)]
29
+ else
30
+ f2 = @function[x2 += factor * (x2 - x1)]
31
+ end
32
+ end
33
+ return
34
+ end
35
+
36
+ # Find the root of function in +range+ and return it. The method raises a
37
+ # DivergentException, if no such root could be found after +n+ tries and in
38
+ # the +epsilon+ environment.
39
+ def solve(range = nil, n = 1 << 16, epsilon = 1E-16)
40
+ if range
41
+ x1, x2 = range.first.to_f, range.last.to_f
42
+ x1 >= x2 and raise ArgumentError, "bad initial range #{range}"
43
+ elsif range = bracket
44
+ x1, x2 = range.first, range.last
45
+ else
46
+ raise DivergentException, "bracket could not be determined"
47
+ end
48
+ f = @function[x1]
49
+ fmid = @function[x2]
50
+ f * fmid >= 0 and raise DivergentException, "root must be bracketed in #{range}"
51
+ root = if f < 0
52
+ dx = x2 - x1
53
+ x1
54
+ else
55
+ dx = x1 - x2
56
+ x2
57
+ end
58
+ n.times do
59
+ fmid = @function[xmid = root + (dx *= 0.5)]
60
+ fmid < 0 and root = xmid
61
+ dx.abs < epsilon or fmid == 0 and return root
62
+ end
63
+ raise DivergentException, "too many iterations (#{n})"
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,337 @@
1
+ require 'more_math'
2
+
3
+ module MoreMath
4
+ # This class is used to contain elements and compute various statistical
5
+ # values for them.
6
+ class Sequence
7
+ def initialize(elements)
8
+ @elements = elements
9
+ @elements.freeze
10
+ end
11
+
12
+ # Returns the array of elements.
13
+ attr_reader :elements
14
+
15
+ # Calls the +block+ for every element of this Sequence.
16
+ def each(&block)
17
+ @elements.each(&block)
18
+ end
19
+ include Enumerable
20
+
21
+ # Returns true if this sequence is empty, otherwise false.
22
+ def empty?
23
+ @elements.empty?
24
+ end
25
+
26
+ # Returns the number of elements, on which the analysis is based.
27
+ def size
28
+ @elements.size
29
+ end
30
+
31
+ # Returns the variance of the elements.
32
+ def variance
33
+ @variance ||= sum_of_squares / size
34
+ end
35
+
36
+ # Returns the sample_variance of the elements.
37
+ def sample_variance
38
+ @sample_variance ||= size > 1 ? sum_of_squares / (size - 1.0) : 0.0
39
+ end
40
+
41
+ # Returns the sum of squares (the sum of the squared deviations) of the
42
+ # elements.
43
+ def sum_of_squares
44
+ @sum_of_squares ||= @elements.inject(0.0) { |s, t| s + (t - arithmetic_mean) ** 2 }
45
+ end
46
+
47
+ # Returns the standard deviation of the elements.
48
+ def standard_deviation
49
+ @sample_deviation ||= Math.sqrt(variance)
50
+ end
51
+
52
+ # Returns the standard deviation of the elements in percentage of the
53
+ # arithmetic mean.
54
+ def standard_deviation_percentage
55
+ @standard_deviation_percentage ||= 100.0 * standard_deviation / arithmetic_mean
56
+ end
57
+
58
+ # Returns the sample standard deviation of the elements.
59
+ def sample_standard_deviation
60
+ @sample_standard_deviation ||= Math.sqrt(sample_variance)
61
+ end
62
+
63
+ # Returns the sample standard deviation of the elements in percentage
64
+ # of the arithmetic mean.
65
+ def sample_standard_deviation_percentage
66
+ @sample_standard_deviation_percentage ||= 100.0 * sample_standard_deviation / arithmetic_mean
67
+ end
68
+
69
+ # Returns the sum of all elements.
70
+ def sum
71
+ @sum ||= @elements.inject(0.0) { |s, t| s + t }
72
+ end
73
+
74
+ # Returns the arithmetic mean of the elements.
75
+ def arithmetic_mean
76
+ @arithmetic_mean ||= sum / size
77
+ end
78
+
79
+ alias mean arithmetic_mean
80
+
81
+ # Returns the harmonic mean of the elements. If any of the elements
82
+ # is less than or equal to 0.0, this method returns NaN.
83
+ def harmonic_mean
84
+ @harmonic_mean ||= (
85
+ sum = @elements.inject(0.0) { |s, t|
86
+ if t > 0
87
+ s + 1.0 / t
88
+ else
89
+ break nil
90
+ end
91
+ }
92
+ sum ? size / sum : 0 / 0.0
93
+ )
94
+ end
95
+
96
+ # Returns the geometric mean of the elements. If any of the
97
+ # elements is less than 0.0, this method returns NaN.
98
+ def geometric_mean
99
+ @geometric_mean ||= (
100
+ sum = @elements.inject(0.0) { |s, t|
101
+ case
102
+ when t > 0
103
+ s + Math.log(t)
104
+ when t == 0
105
+ break :null
106
+ else
107
+ break nil
108
+ end
109
+ }
110
+ case sum
111
+ when :null
112
+ 0.0
113
+ when Float
114
+ Math.exp(sum / size)
115
+ else
116
+ 0 / 0.0
117
+ end
118
+ )
119
+ end
120
+
121
+ # Returns the minimum of the elements.
122
+ def min
123
+ @min ||= @elements.min
124
+ end
125
+
126
+ # Returns the maximum of the elements.
127
+ def max
128
+ @max ||= @elements.max
129
+ end
130
+
131
+ # Returns the +p+-percentile of the elements.
132
+ # There are many methods to compute the percentile, this method uses the
133
+ # the weighted average at x_(n + 1)p, which allows p to be in 0...100
134
+ # (excluding the 100).
135
+ def percentile(p = 50)
136
+ (0...100).include?(p) or
137
+ raise ArgumentError, "p = #{p}, but has to be in (0...100)"
138
+ p /= 100.0
139
+ @sorted ||= @elements.sort
140
+ r = p * (@sorted.size + 1)
141
+ r_i = r.to_i
142
+ r_f = r - r_i
143
+ if r_i >= 1
144
+ result = @sorted[r_i - 1]
145
+ if r_i < @sorted.size
146
+ result += r_f * (@sorted[r_i] - @sorted[r_i - 1])
147
+ end
148
+ else
149
+ result = @sorted[0]
150
+ end
151
+ result
152
+ end
153
+
154
+ alias median percentile
155
+
156
+ # Use an approximation of the Welch-Satterthwaite equation to compute the
157
+ # degrees of freedom for Welch's t-test.
158
+ def compute_welch_df(other)
159
+ (sample_variance / size + other.sample_variance / other.size) ** 2 / (
160
+ (sample_variance ** 2 / (size ** 2 * (size - 1))) +
161
+ (other.sample_variance ** 2 / (other.size ** 2 * (other.size - 1))))
162
+ end
163
+
164
+ # Returns the t value of the Welch's t-test between this Sequence
165
+ # instance and the +other+.
166
+ def t_welch(other)
167
+ signal = arithmetic_mean - other.arithmetic_mean
168
+ noise = Math.sqrt(sample_variance / size +
169
+ other.sample_variance / other.size)
170
+ signal / noise
171
+ rescue Errno::EDOM
172
+ 0.0
173
+ end
174
+
175
+ # Returns an estimation of the common standard deviation of the
176
+ # elements of this and +other+.
177
+ def common_standard_deviation(other)
178
+ Math.sqrt(common_variance(other))
179
+ end
180
+
181
+ # Returns an estimation of the common variance of the elements of this
182
+ # and +other+.
183
+ def common_variance(other)
184
+ (size - 1) * sample_variance + (other.size - 1) *
185
+ other.sample_variance / (size + other.size - 2)
186
+ end
187
+
188
+ # Compute the # degrees of freedom for Student's t-test.
189
+ def compute_student_df(other)
190
+ size + other.size - 2
191
+ end
192
+
193
+ # Returns the t value of the Student's t-test between this Sequence
194
+ # instance and the +other+.
195
+ def t_student(other)
196
+ signal = arithmetic_mean - other.arithmetic_mean
197
+ noise = common_standard_deviation(other) *
198
+ Math.sqrt(size ** -1 + size ** -1)
199
+ rescue Errno::EDOM
200
+ 0.0
201
+ end
202
+
203
+ # Compute a sample size, that will more likely yield a mean difference
204
+ # between this instance's elements and those of +other+. Use +alpha+
205
+ # and +beta+ as levels for the first- and second-order errors.
206
+ def suggested_sample_size(other, alpha = 0.05, beta = 0.05)
207
+ alpha, beta = alpha.abs, beta.abs
208
+ signal = arithmetic_mean - other.arithmetic_mean
209
+ df = size + other.size - 2
210
+ pooled_variance_estimate = (sum_of_squares + other.sum_of_squares) / df
211
+ td = TDistribution.new df
212
+ (((td.inverse_probability(alpha) + td.inverse_probability(beta)) *
213
+ Math.sqrt(pooled_variance_estimate)) / signal) ** 2
214
+ end
215
+
216
+ # Return true, if the Sequence instance covers the +other+, that is their
217
+ # arithmetic mean value is most likely to be equal for the +alpha+ error
218
+ # level.
219
+ def cover?(other, alpha = 0.05)
220
+ t = t_welch(other)
221
+ td = TDistribution.new(compute_welch_df(other))
222
+ t.abs < td.inverse_probability(1 - alpha.abs / 2.0)
223
+ end
224
+
225
+ # Return the confidence interval for the arithmetic mean with alpha level +alpha+ of
226
+ # the elements of this Sequence instance as a Range object.
227
+ def confidence_interval(alpha = 0.05)
228
+ td = TDistribution.new(size - 1)
229
+ t = td.inverse_probability(alpha / 2).abs
230
+ delta = t * sample_standard_deviation / Math.sqrt(size)
231
+ (arithmetic_mean - delta)..(arithmetic_mean + delta)
232
+ end
233
+
234
+ # Returns the array of autovariances (of length size - 1).
235
+ def autovariance
236
+ Array.new(size - 1) do |k|
237
+ s = 0.0
238
+ 0.upto(size - k - 1) do |i|
239
+ s += (@elements[i] - arithmetic_mean) * (@elements[i + k] - arithmetic_mean)
240
+ end
241
+ s / size
242
+ end
243
+ end
244
+
245
+ # Returns the array of autocorrelation values c_k / c_0 (of length size -
246
+ # 1).
247
+ def autocorrelation
248
+ c = autovariance
249
+ Array.new(c.size) { |k| c[k] / c[0] }
250
+ end
251
+
252
+ # Returns the d-value for the Durbin-Watson statistic. The value is d << 2
253
+ # for positive, d >> 2 for negative and d around 2 for no autocorrelation.
254
+ def durbin_watson_statistic
255
+ e = linear_regression.residues
256
+ e.size <= 1 and return 2.0
257
+ (1...e.size).inject(0.0) { |s, i| s + (e[i] - e[i - 1]) ** 2 } /
258
+ e.inject(0.0) { |s, x| s + x ** 2 }
259
+ end
260
+
261
+ # Returns the q value of the Ljung-Box statistic for the number of lags
262
+ # +lags+. A higher value might indicate autocorrelation in the elements of
263
+ # this Sequence instance. This method returns nil if there weren't enough
264
+ # (at least lags) lags available.
265
+ def ljung_box_statistic(lags = 20)
266
+ r = autocorrelation
267
+ lags >= r.size and return
268
+ n = size
269
+ n * (n + 2) * (1..lags).inject(0.0) { |s, i| s + r[i] ** 2 / (n - i) }
270
+ end
271
+
272
+ # This method tries to detect autocorrelation with the Ljung-Box
273
+ # statistic. If enough lags can be considered it returns a hash with
274
+ # results, otherwise nil is returned. The keys are
275
+ # :lags:: the number of lags,
276
+ # :alpha_level:: the alpha level for the test,
277
+ # :q:: the value of the ljung_box_statistic,
278
+ # :p:: the p-value computed, if p is higher than alpha no correlation was detected,
279
+ # :detected:: true if a correlation was found.
280
+ def detect_autocorrelation(lags = 20, alpha_level = 0.05)
281
+ if q = ljung_box_statistic(lags)
282
+ p = ChiSquareDistribution.new(lags).probability(q)
283
+ return {
284
+ :lags => lags,
285
+ :alpha_level => alpha_level,
286
+ :q => q,
287
+ :p => p,
288
+ :detected => p >= 1 - alpha_level,
289
+ }
290
+ end
291
+ end
292
+
293
+ # Return a result hash with the number of :very_low, :low, :high, and
294
+ # :very_high outliers, determined by the box plotting algorithm run with
295
+ # :median and :iqr parameters. If no outliers were found or the iqr is
296
+ # less than epsilon, nil is returned.
297
+ def detect_outliers(factor = 3.0, epsilon = 1E-5)
298
+ half_factor = factor / 2.0
299
+ quartile1 = percentile(25)
300
+ quartile3 = percentile(75)
301
+ iqr = quartile3 - quartile1
302
+ iqr < epsilon and return
303
+ result = @elements.inject(Hash.new(0)) do |h, t|
304
+ extreme =
305
+ case t
306
+ when -Infinity..(quartile1 - factor * iqr)
307
+ :very_low
308
+ when (quartile1 - factor * iqr)..(quartile1 - half_factor * iqr)
309
+ :low
310
+ when (quartile1 + half_factor * iqr)..(quartile3 + factor * iqr)
311
+ :high
312
+ when (quartile3 + factor * iqr)..Infinity
313
+ :very_high
314
+ end and h[extreme] += 1
315
+ h
316
+ end
317
+ unless result.empty?
318
+ result[:median] = median
319
+ result[:iqr] = iqr
320
+ result[:factor] = factor
321
+ result
322
+ end
323
+ end
324
+
325
+ # Returns the LinearRegression object for the equation a * x + b which
326
+ # represents the line computed by the linear regression algorithm.
327
+ def linear_regression
328
+ @linear_regression ||= LinearRegression.new @elements
329
+ end
330
+
331
+ # Returns a Histogram instance with +bins+ as the number of bins for this
332
+ # analysis' elements.
333
+ def histogram(bins)
334
+ Histogram.new(self, bins)
335
+ end
336
+ end
337
+ end
@@ -0,0 +1,8 @@
1
+ module MoreMath
2
+ # MoreMath version
3
+ VERSION = '0.0.0'
4
+ VERSION_ARRAY = VERSION.split(/\./).map { |x| x.to_i } # :nodoc:
5
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
7
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
8
+ end
data/lib/more_math.rb ADDED
@@ -0,0 +1,9 @@
1
+ module MoreMath
2
+ Infinity = 1.0 / 0 # Refers to floating point infinity.
3
+
4
+ Dir.chdir(File.join(File.dirname(__FILE__), 'more_math')) do
5
+ Dir['**/*.rb'].each do |filename|
6
+ require File.join('more_math', filename.gsub(/\.rb\Z/, ''))
7
+ end
8
+ end
9
+ end
data/make_doc.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ puts "Creating documentation."
4
+ system "rdoc --main README --title 'MoreMath -- More Math in Ruby'"\
5
+ " -d #{Dir['lib/**/*.rb'] * ' '} README"