linefit 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README +1 -1
  2. data/lib/linefit.rb +501 -501
  3. metadata +9 -8
  4. checksums.yaml +0 -7
data/README CHANGED
@@ -237,7 +237,7 @@ METHODS
237
237
  do multiple line fits using the same weights, the weights must be passed
238
238
  to each call to setData().
239
239
 
240
- The method will return flase if the array lengths don't match, there are
240
+ The method will return false if the array lengths don't match, there are
241
241
  less than two data points, any weights are negative or less than two of
242
242
  the weights are nonzero. If the new() method was called with validate =
243
243
  1, the method will also verify that the data and weights are valid
@@ -1,501 +1,501 @@
1
- # == Synopsis
2
- #
3
- # Weighted or unweighted least-squares line fitting to two-dimensional data (y = a + b * x).
4
- # (This is also called linear regression.)
5
- #
6
- # == Usage
7
- #
8
- # x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]
9
- # y = [4039,4057,4052,4094,4104,4110,4154,4161,4186,4195,4229,4244,4242,4283,4322,4333,4368,4389]
10
- #
11
- # linefit = LineFit.new
12
- # linefit.setData(x,y)
13
- #
14
- # intercept, slope = linefit.coefficients
15
- # rSquared = linefit.rSquared
16
- # meanSquaredError = linefit.meanSqError
17
- # durbinWatson = linefit.durbinWatson
18
- # sigma = linefit.sigma
19
- # tStatIntercept, tStatSlope = linefit.tStatistics
20
- # predictedYs = linefit.predictedYs
21
- # residuals = linefit.residuals
22
- # varianceIntercept, varianceSlope = linefit.varianceOfEstimates
23
- #
24
- # newX = 24
25
- # newY = linefit.forecast(newX)
26
- #
27
- # == Authors
28
- # Eric Cline, escline(at)gmail(dot)com, ( Ruby Port, LineFit#forecast )
29
- #
30
- #
31
- # Richard Anderson ( Statistics::LineFit Perl module )
32
- # http://search.cpan.org/~randerson/Statistics-LineFit-0.07
33
- #
34
- # == See Also
35
- # Mendenhall, W., and Sincich, T.L., 2003, A Second Course in Statistics:
36
- # Regression Analysis, 6th ed., Prentice Hall.
37
- # Press, W. H., Flannery, B. P., Teukolsky, S. A., Vetterling, W. T., 1992,
38
- # Numerical Recipes in C : The Art of Scientific Computing, 2nd ed.,
39
- # Cambridge University Press.
40
- #
41
- # == License
42
- # Licensed under the same terms as Ruby.
43
- #
44
-
45
- class LineFit
46
-
47
- ############################################################################
48
- # Create a LineFit object with the optional validate and hush parameters
49
- #
50
- # linefit = LineFit.new
51
- # linefit = LineFit.new(validate)
52
- # linefit = LineFit.new(validate, hush)
53
- #
54
- # validate = 1 -> Verify input data is numeric (slower execution)
55
- # = 0 -> Don't verify input data (default, faster execution)
56
- # hush = 1 -> Suppress error messages
57
- # = 0 -> Enable error messages (default)
58
-
59
- def initialize(validate = FALSE, hush = FALSE)
60
- @doneRegress = FALSE
61
- @gotData = FALSE
62
- @hush = hush
63
- @validate = validate
64
- end
65
-
66
- ############################################################################
67
- # Return the slope and intercept from least squares line fit
68
- #
69
- # intercept, slope = linefit.coefficients
70
- #
71
- # The returned list is undefined if the regression fails.
72
- #
73
- def coefficients
74
- self.regress unless (@intercept and @slope)
75
- return @intercept, @slope
76
- end
77
-
78
- ############################################################################
79
- # Return the Durbin-Watson statistic
80
- #
81
- # durbinWatson = linefit.durbinWatson
82
- #
83
- # The Durbin-Watson test is a test for first-order autocorrelation in the
84
- # residuals of a time series regression. The Durbin-Watson statistic has a
85
- # range of 0 to 4; a value of 2 indicates there is no autocorrelation.
86
- #
87
- # The return value is undefined if the regression fails. If weights are
88
- # input, the return value is the weighted Durbin-Watson statistic.
89
- #
90
- def durbinWatson
91
- unless @durbinWatson
92
- self.regress or return
93
- sumErrDiff = 0
94
- errorTMinus1 = @y[0] - (@intercept + @slope * @x[0])
95
- 1.upto(@numxy-1) do |i|
96
- error = @y[i] - (@intercept + @slope * @x[i])
97
- sumErrDiff += (error - errorTMinus1) ** 2
98
- errorTMinus1 = error
99
- end
100
- @durbinWatson = sumSqErrors() > 0 ? sumErrDiff / sumSqErrors() : 0
101
- end
102
- return @durbinWatson
103
- end
104
-
105
- ############################################################################
106
- # Return the mean squared error
107
- #
108
- # meanSquaredError = linefit.meanSqError
109
- #
110
- # The return value is undefined if the regression fails. If weights are
111
- # input, the return value is the weighted mean squared error.
112
- #
113
- def meanSqError
114
- unless @meanSqError
115
- self.regress or return
116
- @meanSqError = sumSqErrors() / @numxy
117
- end
118
- return @meanSqError
119
- end
120
-
121
- ############################################################################
122
- # Return the predicted Y values
123
- #
124
- # predictedYs = linefit.predictedYs
125
- #
126
- # The returned list is undefined if the regression fails.
127
- #
128
- def predictedYs
129
- unless @predictedYs
130
- self.regress or return
131
- @predictedYs = []
132
- 0.upto(@numxy-1) do |i|
133
- @predictedYs[i] = @intercept + @slope * @x[i]
134
- end
135
- end
136
- return @predictedYs
137
- end
138
-
139
- ############################################################################
140
- # Return the independent (Y) value, by using a dependent (X) value
141
- #
142
- # forecasted_y = linefit.forecast(x_value)
143
- #
144
- # Will use the slope and intercept to calculate the Y value along the line
145
- # at the x value. Note: value returned only as good as the line fit.
146
- #
147
- def forecast(x)
148
- self.regress unless (@intercept and @slope)
149
- return @slope * x + @intercept
150
- end
151
-
152
- ############################################################################
153
- # Do the least squares line fit (if not already done)
154
- #
155
- # linefit.regress
156
- #
157
- # You don't need to call this method because it is invoked by the other
158
- # methods as needed. After you call setData(), you can call regress() at
159
- # any time to get the status of the regression for the current data.
160
- #
161
- def regress
162
- return @regressOK if @doneRegress
163
- unless @gotData
164
- puts "No valid data input - can't do regression" unless @hush
165
- return FALSE
166
- end
167
- sumx, sumy, @sumxx, sumyy, sumxy = computeSums()
168
- @sumSqDevx = @sumxx - sumx ** 2 / @numxy
169
- if @sumSqDevx != 0
170
- @sumSqDevy = sumyy - sumy ** 2 / @numxy
171
- @sumSqDevxy = sumxy - sumx * sumy / @numxy
172
- @slope = @sumSqDevxy / @sumSqDevx
173
- @intercept = (sumy - @slope * sumx) / @numxy
174
- @regressOK = TRUE
175
- else
176
- puts "Can't fit line when x values are all equal" unless @hush
177
- @sumxx = @sumSqDevx = nil
178
- @regressOK = FALSE
179
- end
180
- @doneRegress = TRUE
181
- return @regressOK
182
- end
183
-
184
- ############################################################################
185
- # Return the predicted Y values minus the observed Y values
186
- #
187
- # residuals = linefit.residuals
188
- #
189
- # The returned list is undefined if the regression fails.
190
- #
191
- def residuals
192
- unless @residuals
193
- self.regress or return
194
- @residuals = []
195
- 0.upto(@numxy-1) do |i|
196
- @residuals[i] = @y[i] - (@intercept + @slope * @x[i])
197
- end
198
- end
199
- return @residuals
200
- end
201
-
202
- ############################################################################
203
- # Return the correlation coefficient
204
- #
205
- # rSquared = linefit.rSquared
206
- #
207
- # R squared, also called the square of the Pearson product-moment
208
- # correlation coefficient, is a measure of goodness-of-fit. It is the
209
- # fraction of the variation in Y that can be attributed to the variation
210
- # in X. A perfect fit will have an R squared of 1; fitting a line to the
211
- # vertices of a regular polygon will yield an R squared of zero. Graphical
212
- # displays of data with an R squared of less than about 0.1 do not show a
213
- # visible linear trend.
214
- #
215
- # The return value is undefined if the regression fails. If weights are
216
- # input, the return value is the weighted correlation coefficient.
217
- #
218
- def rSquared
219
- unless @rSquared
220
- self.regress or return
221
- denom = @sumSqDevx * @sumSqDevy
222
- @rSquared = denom != 0 ? @sumSqDevxy ** 2 / denom : 1
223
- end
224
- return @rSquared
225
- end
226
-
227
- ############################################################################
228
- # Initialize (x,y) values and optional weights
229
- #
230
- # lineFit.setData(x, y)
231
- # lineFit.setData(x, y, weights)
232
- # lineFit.setData(xy)
233
- # lineFit.setData(xy, weights)
234
- #
235
- # xy is an array of arrays; x values are xy[i][0], y values are
236
- # xy[i][1]. The method identifies the difference between the first
237
- # and fourth calling signatures by examining the first argument.
238
- #
239
- # The optional weights array must be the same length as the data array(s).
240
- # The weights must be non-negative numbers; at least two of the weights
241
- # must be nonzero. Only the relative size of the weights is significant:
242
- # the program normalizes the weights (after copying the input values) so
243
- # that the sum of the weights equals the number of points. If you want to
244
- # do multiple line fits using the same weights, the weights must be passed
245
- # to each call to setData().
246
- #
247
- # The method will return flase if the array lengths don't match, there are
248
- # less than two data points, any weights are negative or less than two of
249
- # the weights are nonzero. If the new() method was called with validate =
250
- # 1, the method will also verify that the data and weights are valid
251
- # numbers. Once you successfully call setData(), the next call to any
252
- # method other than new() or setData() invokes the regression.
253
- #
254
- def setData(x, y, weights = nil)
255
- @doneRegress = FALSE
256
- @x = @y = @numxy = @weight = \
257
- @intercept = @slope = @rSquared = \
258
- @sigma = @durbinWatson = @meanSqError = \
259
- @sumSqErrors = @tStatInt = @tStatSlope = \
260
- @predictedYs = @residuals = @sumxx = \
261
- @sumSqDevx = @sumSqDevy = @sumSqDevxy = nil
262
- if x.length < 2
263
- puts "Must input more than one data point!" unless @hush
264
- return FALSE
265
- end
266
- if x[0].class == Array
267
- @numxy = x[0].length
268
- setWeights(y) or return FALSE
269
- @x = []
270
- @y = []
271
- x.each do |xy|
272
- @x = xy[0]
273
- @y = xy[1]
274
- end
275
- else
276
- if x.length != y.length
277
- puts "Length of x and y arrays must be equal!" unless @hush
278
- return FALSE
279
- end
280
- @numxy = x.length
281
- setWeights(weights) or return FALSE
282
- @x = x
283
- @y = y
284
- end
285
- if @validate
286
- unless validData()
287
- @x = @y = @weights = @numxy = nil
288
- return FALSE
289
- end
290
- end
291
- @gotData = TRUE
292
- return TRUE
293
- end
294
-
295
- ############################################################################
296
- # Return the estimated homoscedastic standard deviation of the
297
- # error term
298
- #
299
- # sigma = linefit.sigma
300
- #
301
- # Sigma is an estimate of the homoscedastic standard deviation of the
302
- # error. Sigma is also known as the standard error of the estimate.
303
- #
304
- # The return value is undefined if the regression fails. If weights are
305
- # input, the return value is the weighted standard error.
306
- #
307
- def sigma
308
- unless @sigma
309
- self.regress or return
310
- @sigma = @numxy > 2 ? Math.sqrt(sumSqErrors() / (@numxy - 2)) : 0
311
- end
312
- return @sigma
313
- end
314
-
315
- ############################################################################
316
- # Return the T statistics
317
- #
318
- # tStatIntercept, tStatSlope = linefit.tStatistics
319
- #
320
- # The t statistic, also called the t ratio or Wald statistic, is used to
321
- # accept or reject a hypothesis using a table of cutoff values computed
322
- # from the t distribution. The t-statistic suggests that the estimated
323
- # value is (reasonable, too small, too large) when the t-statistic is
324
- # (close to zero, large and positive, large and negative).
325
- #
326
- # The returned list is undefined if the regression fails. If weights are
327
- # input, the returned values are the weighted t statistics.
328
- #
329
- def tStatistics
330
- unless (@tStatInt and @tStatSlope)
331
- self.regress or return
332
- biasEstimateInt = sigma() * Math.sqrt(@sumxx / (@sumSqDevx * @numxy))
333
- @tStatInt = biasEstimateInt != 0 ? @intercept / biasEstimateInt : 0
334
- biasEstimateSlope = sigma() / Math.sqrt(@sumSqDevx)
335
- @tStatSlope = biasEstimateSlope != 0 ? @slope / biasEstimateSlope : 0
336
- end
337
- return @tStatInt, @tStatSlope
338
- end
339
-
340
- ############################################################################
341
- # Return the variances in the estiamtes of the intercept and slope
342
- #
343
- # varianceIntercept, varianceSlope = linefit.varianceOfEstimates
344
- #
345
- # Assuming the data are noisy or inaccurate, the intercept and slope
346
- # returned by the coefficients() method are only estimates of the true
347
- # intercept and slope. The varianceofEstimate() method returns the
348
- # variances of the estimates of the intercept and slope, respectively. See
349
- # Numerical Recipes in C, section 15.2 (Fitting Data to a Straight Line),
350
- # equation 15.2.9.
351
- #
352
- # The returned list is undefined if the regression fails. If weights are
353
- # input, the returned values are the weighted variances.
354
- #
355
- def varianceOfEstimates
356
- unless @intercept and @slope
357
- self.regress or return
358
- end
359
- predictedYs = predictedYs()
360
- s = sx = sxx = 0
361
- if @weight
362
- 0.upto(@numxy-1) do |i|
363
- variance = (predictedYs[i] - @y[i]) ** 2
364
- unless variance == 0
365
- s += 1.0 / variance
366
- sx += @weight[i] * @x[i] / variance
367
- sxx += @weight[i] * @x[i] ** 2 / variance
368
- end
369
- end
370
- else
371
- 0.upto(@numxy-1) do |i|
372
- variance = (predictedYs[i] - @y[i]) ** 2
373
- unless variance == 0
374
- s += 1.0 / variance
375
- sx += @x[i] / variance
376
- sxx += @x[i] ** 2 / variance
377
- end
378
- end
379
- end
380
- denominator = (s * sxx - sx ** 2)
381
- if denominator == 0
382
- return
383
- else
384
- return sxx / denominator, s / denominator
385
- end
386
- end
387
-
388
- private
389
-
390
- ############################################################################
391
- # Compute sum of x, y, x**2, y**2, and x*y
392
- #
393
- def computeSums
394
- sumx = sumy = sumxx = sumyy = sumxy = 0
395
- if @weight
396
- 0.upto(@numxy-1) do |i|
397
- sumx += @weight[i] * @x[i]
398
- sumy += @weight[i] * @y[i]
399
- sumxx += @weight[i] * @x[i] ** 2
400
- sumyy += @weight[i] * @y[i] ** 2
401
- sumxy += @weight[i] * @x[i] * @y[i]
402
- end
403
- else
404
- 0.upto(@numxy-1) do |i|
405
- sumx += @x[i]
406
- sumy += @y[i]
407
- sumxx += @x[i] ** 2
408
- sumyy += @y[i] ** 2
409
- sumxy += @x[i] * @y[i]
410
- end
411
- end
412
- # Multiply each return value by 1.0 to force them to Floats
413
- return sumx * 1.0, sumy * 1.0, sumxx * 1.0, sumyy * 1.0, sumxy * 1.0
414
- end
415
-
416
- ############################################################################
417
- # Normalize and initialize line fit weighting factors
418
- #
419
- def setWeights(weights = nil)
420
- return TRUE unless weights
421
- if weights.length != @numxy
422
- puts "Length of weight array must equal length of data array!" unless @hush
423
- return FALSE
424
- end
425
- if @validate
426
- validWeights(weights) or return FALSE
427
- end
428
- sumw = numNonZero = 0
429
- weights.each do |weight|
430
- if weight < 0
431
- puts "Weights must be non-negative numbers!" unless @hush
432
- return FALSE
433
- end
434
- sumw += weight
435
- numNonZero += 1 if weight != 0
436
- end
437
- if numNonZero < 2
438
- puts "At least two weights must be nonzero!" unless @hush
439
- return FALSE
440
- end
441
- factor = weights.length.to_f / sumw
442
- weights.collect! {|weight| weight * factor}
443
- @weight = weights
444
- return TRUE
445
- end
446
-
447
- ############################################################################
448
- # Return the sum of the squared errors
449
- #
450
- def sumSqErrors
451
- unless @sumSqErrors
452
- self.regress or return
453
- @sumSqErrors = @sumSqDevy - @sumSqDevx * @slope ** 2
454
- @sumSqErrors = 0 if @sumSqErrors < 0
455
- end
456
- return @sumSqErrors
457
- end
458
-
459
- ############################################################################
460
- # Verify that the input x-y data are numeric
461
- #
462
- def validData
463
- 0.upto(@numxy-1) do |i|
464
- unless @x[i]
465
- puts "Input x[#{i}] is not defined" unless @hush
466
- return FALSE
467
- end
468
- if @x[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
469
- puts "Input x[#{i}] is not a number: #{x[i]}" unless @hush
470
- return FALSE
471
- end
472
- unless @y[i]
473
- puts "Input y[#{i}] is not defined" unless @hush
474
- return FALSE
475
- end
476
- if @y[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
477
- puts "Input y[#{i}] is not a number: #{y[i]}" unless @hush
478
- return FALSE
479
- end
480
- end
481
- return TRUE
482
- end
483
-
484
- ############################################################################
485
- # Verify that the input weights are numeric
486
- #
487
- def validWeights(weights)
488
- 0.upto(weights.length) do |i|
489
- unless weights[i]
490
- puts "Input weights[#{i}] is not defined" unless @hush
491
- return FALSE
492
- end
493
- if weights[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
494
- puts "Input weights[#{i}] is not a number: #{weights[i]}" unless @hush
495
- return FALSE
496
- end
497
- end
498
- return TRUE
499
- end
500
-
501
- end
1
+ # == Synopsis
2
+ #
3
+ # Weighted or unweighted least-squares line fitting to two-dimensional data (y = a + b * x).
4
+ # (This is also called linear regression.)
5
+ #
6
+ # == Usage
7
+ #
8
+ # x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18]
9
+ # y = [4039,4057,4052,4094,4104,4110,4154,4161,4186,4195,4229,4244,4242,4283,4322,4333,4368,4389]
10
+ #
11
+ # linefit = LineFit.new
12
+ # linefit.setData(x,y)
13
+ #
14
+ # intercept, slope = linefit.coefficients
15
+ # rSquared = linefit.rSquared
16
+ # meanSquaredError = linefit.meanSqError
17
+ # durbinWatson = linefit.durbinWatson
18
+ # sigma = linefit.sigma
19
+ # tStatIntercept, tStatSlope = linefit.tStatistics
20
+ # predictedYs = linefit.predictedYs
21
+ # residuals = linefit.residuals
22
+ # varianceIntercept, varianceSlope = linefit.varianceOfEstimates
23
+ #
24
+ # newX = 24
25
+ # newY = linefit.forecast(newX)
26
+ #
27
+ # == Authors
28
+ # Eric Cline, escline(at)gmail(dot)com, ( Ruby Port, LineFit#forecast )
29
+ #
30
+ #
31
+ # Richard Anderson ( Statistics::LineFit Perl module )
32
+ # http://search.cpan.org/~randerson/Statistics-LineFit-0.07
33
+ #
34
+ # == See Also
35
+ # Mendenhall, W., and Sincich, T.L., 2003, A Second Course in Statistics:
36
+ # Regression Analysis, 6th ed., Prentice Hall.
37
+ # Press, W. H., Flannery, B. P., Teukolsky, S. A., Vetterling, W. T., 1992,
38
+ # Numerical Recipes in C : The Art of Scientific Computing, 2nd ed.,
39
+ # Cambridge University Press.
40
+ #
41
+ # == License
42
+ # Licensed under the same terms as Ruby.
43
+ #
44
+
45
+ class LineFit
46
+
47
+ ############################################################################
48
+ # Create a LineFit object with the optional validate and hush parameters
49
+ #
50
+ # linefit = LineFit.new
51
+ # linefit = LineFit.new(validate)
52
+ # linefit = LineFit.new(validate, hush)
53
+ #
54
+ # validate = 1 -> Verify input data is numeric (slower execution)
55
+ # = 0 -> Don't verify input data (default, faster execution)
56
+ # hush = 1 -> Suppress error messages
57
+ # = 0 -> Enable error messages (default)
58
+
59
+ def initialize(validate = FALSE, hush = FALSE)
60
+ @doneRegress = FALSE
61
+ @gotData = FALSE
62
+ @hush = hush
63
+ @validate = validate
64
+ end
65
+
66
+ ############################################################################
67
+ # Return the slope and intercept from least squares line fit
68
+ #
69
+ # intercept, slope = linefit.coefficients
70
+ #
71
+ # The returned list is undefined if the regression fails.
72
+ #
73
+ def coefficients
74
+ self.regress unless (@intercept and @slope)
75
+ return @intercept, @slope
76
+ end
77
+
78
+ ############################################################################
79
+ # Return the Durbin-Watson statistic
80
+ #
81
+ # durbinWatson = linefit.durbinWatson
82
+ #
83
+ # The Durbin-Watson test is a test for first-order autocorrelation in the
84
+ # residuals of a time series regression. The Durbin-Watson statistic has a
85
+ # range of 0 to 4; a value of 2 indicates there is no autocorrelation.
86
+ #
87
+ # The return value is undefined if the regression fails. If weights are
88
+ # input, the return value is the weighted Durbin-Watson statistic.
89
+ #
90
+ def durbinWatson
91
+ unless @durbinWatson
92
+ self.regress or return
93
+ sumErrDiff = 0
94
+ errorTMinus1 = @y[0] - (@intercept + @slope * @x[0])
95
+ 1.upto(@numxy-1) do |i|
96
+ error = @y[i] - (@intercept + @slope * @x[i])
97
+ sumErrDiff += (error - errorTMinus1) ** 2
98
+ errorTMinus1 = error
99
+ end
100
+ @durbinWatson = sumSqErrors() > 0 ? sumErrDiff / sumSqErrors() : 0
101
+ end
102
+ return @durbinWatson
103
+ end
104
+
105
+ ############################################################################
106
+ # Return the mean squared error
107
+ #
108
+ # meanSquaredError = linefit.meanSqError
109
+ #
110
+ # The return value is undefined if the regression fails. If weights are
111
+ # input, the return value is the weighted mean squared error.
112
+ #
113
+ def meanSqError
114
+ unless @meanSqError
115
+ self.regress or return
116
+ @meanSqError = sumSqErrors() / @numxy
117
+ end
118
+ return @meanSqError
119
+ end
120
+
121
+ ############################################################################
122
+ # Return the predicted Y values
123
+ #
124
+ # predictedYs = linefit.predictedYs
125
+ #
126
+ # The returned list is undefined if the regression fails.
127
+ #
128
+ def predictedYs
129
+ unless @predictedYs
130
+ self.regress or return
131
+ @predictedYs = []
132
+ 0.upto(@numxy-1) do |i|
133
+ @predictedYs[i] = @intercept + @slope * @x[i]
134
+ end
135
+ end
136
+ return @predictedYs
137
+ end
138
+
139
+ ############################################################################
140
+ # Return the independent (Y) value, by using a dependent (X) value
141
+ #
142
+ # forecasted_y = linefit.forecast(x_value)
143
+ #
144
+ # Will use the slope and intercept to calculate the Y value along the line
145
+ # at the x value. Note: value returned only as good as the line fit.
146
+ #
147
+ def forecast(x)
148
+ self.regress unless (@intercept and @slope)
149
+ return @slope * x + @intercept
150
+ end
151
+
152
+ ############################################################################
153
+ # Do the least squares line fit (if not already done)
154
+ #
155
+ # linefit.regress
156
+ #
157
+ # You don't need to call this method because it is invoked by the other
158
+ # methods as needed. After you call setData(), you can call regress() at
159
+ # any time to get the status of the regression for the current data.
160
+ #
161
+ def regress
162
+ return @regressOK if @doneRegress
163
+ unless @gotData
164
+ puts "No valid data input - can't do regression" unless @hush
165
+ return FALSE
166
+ end
167
+ sumx, sumy, @sumxx, sumyy, sumxy = computeSums()
168
+ @sumSqDevx = @sumxx - sumx ** 2 / @numxy
169
+ if @sumSqDevx != 0
170
+ @sumSqDevy = sumyy - sumy ** 2 / @numxy
171
+ @sumSqDevxy = sumxy - sumx * sumy / @numxy
172
+ @slope = @sumSqDevxy / @sumSqDevx
173
+ @intercept = (sumy - @slope * sumx) / @numxy
174
+ @regressOK = TRUE
175
+ else
176
+ puts "Can't fit line when x values are all equal" unless @hush
177
+ @sumxx = @sumSqDevx = nil
178
+ @regressOK = FALSE
179
+ end
180
+ @doneRegress = TRUE
181
+ return @regressOK
182
+ end
183
+
184
+ ############################################################################
185
+ # Return the predicted Y values minus the observed Y values
186
+ #
187
+ # residuals = linefit.residuals
188
+ #
189
+ # The returned list is undefined if the regression fails.
190
+ #
191
+ def residuals
192
+ unless @residuals
193
+ self.regress or return
194
+ @residuals = []
195
+ 0.upto(@numxy-1) do |i|
196
+ @residuals[i] = @y[i] - (@intercept + @slope * @x[i])
197
+ end
198
+ end
199
+ return @residuals
200
+ end
201
+
202
+ ############################################################################
203
+ # Return the correlation coefficient
204
+ #
205
+ # rSquared = linefit.rSquared
206
+ #
207
+ # R squared, also called the square of the Pearson product-moment
208
+ # correlation coefficient, is a measure of goodness-of-fit. It is the
209
+ # fraction of the variation in Y that can be attributed to the variation
210
+ # in X. A perfect fit will have an R squared of 1; fitting a line to the
211
+ # vertices of a regular polygon will yield an R squared of zero. Graphical
212
+ # displays of data with an R squared of less than about 0.1 do not show a
213
+ # visible linear trend.
214
+ #
215
+ # The return value is undefined if the regression fails. If weights are
216
+ # input, the return value is the weighted correlation coefficient.
217
+ #
218
+ def rSquared
219
+ unless @rSquared
220
+ self.regress or return
221
+ denom = @sumSqDevx * @sumSqDevy
222
+ @rSquared = denom != 0 ? @sumSqDevxy ** 2 / denom : 1
223
+ end
224
+ return @rSquared
225
+ end
226
+
227
+ ############################################################################
228
+ # Initialize (x,y) values and optional weights
229
+ #
230
+ # lineFit.setData(x, y)
231
+ # lineFit.setData(x, y, weights)
232
+ # lineFit.setData(xy)
233
+ # lineFit.setData(xy, weights)
234
+ #
235
+ # xy is an array of arrays; x values are xy[i][0], y values are
236
+ # xy[i][1]. The method identifies the difference between the first
237
+ # and fourth calling signatures by examining the first argument.
238
+ #
239
+ # The optional weights array must be the same length as the data array(s).
240
+ # The weights must be non-negative numbers; at least two of the weights
241
+ # must be nonzero. Only the relative size of the weights is significant:
242
+ # the program normalizes the weights (after copying the input values) so
243
+ # that the sum of the weights equals the number of points. If you want to
244
+ # do multiple line fits using the same weights, the weights must be passed
245
+ # to each call to setData().
246
+ #
247
+ # The method will return flase if the array lengths don't match, there are
248
+ # less than two data points, any weights are negative or less than two of
249
+ # the weights are nonzero. If the new() method was called with validate =
250
+ # 1, the method will also verify that the data and weights are valid
251
+ # numbers. Once you successfully call setData(), the next call to any
252
+ # method other than new() or setData() invokes the regression.
253
+ #
254
+ def setData(x, y = nil, weights = nil)
255
+ @doneRegress = FALSE
256
+ @x = @y = @numxy = @weight = \
257
+ @intercept = @slope = @rSquared = \
258
+ @sigma = @durbinWatson = @meanSqError = \
259
+ @sumSqErrors = @tStatInt = @tStatSlope = \
260
+ @predictedYs = @residuals = @sumxx = \
261
+ @sumSqDevx = @sumSqDevy = @sumSqDevxy = nil
262
+ if x.length < 2
263
+ puts "Must input more than one data point!" unless @hush
264
+ return FALSE
265
+ end
266
+ if x[0].class == Array
267
+ @numxy = x.length
268
+ setWeights(y) or return FALSE
269
+ @x = []
270
+ @y = []
271
+ x.each do |xy|
272
+ @x << xy[0]
273
+ @y << xy[1]
274
+ end
275
+ else
276
+ if x.length != y.length
277
+ puts "Length of x and y arrays must be equal!" unless @hush
278
+ return FALSE
279
+ end
280
+ @numxy = x.length
281
+ setWeights(weights) or return FALSE
282
+ @x = x
283
+ @y = y
284
+ end
285
+ if @validate
286
+ unless validData()
287
+ @x = @y = @weights = @numxy = nil
288
+ return FALSE
289
+ end
290
+ end
291
+ @gotData = TRUE
292
+ return TRUE
293
+ end
294
+
295
+ ############################################################################
296
+ # Return the estimated homoscedastic standard deviation of the
297
+ # error term
298
+ #
299
+ # sigma = linefit.sigma
300
+ #
301
+ # Sigma is an estimate of the homoscedastic standard deviation of the
302
+ # error. Sigma is also known as the standard error of the estimate.
303
+ #
304
+ # The return value is undefined if the regression fails. If weights are
305
+ # input, the return value is the weighted standard error.
306
+ #
307
+ def sigma
308
+ unless @sigma
309
+ self.regress or return
310
+ @sigma = @numxy > 2 ? Math.sqrt(sumSqErrors() / (@numxy - 2)) : 0
311
+ end
312
+ return @sigma
313
+ end
314
+
315
+ ############################################################################
316
+ # Return the T statistics
317
+ #
318
+ # tStatIntercept, tStatSlope = linefit.tStatistics
319
+ #
320
+ # The t statistic, also called the t ratio or Wald statistic, is used to
321
+ # accept or reject a hypothesis using a table of cutoff values computed
322
+ # from the t distribution. The t-statistic suggests that the estimated
323
+ # value is (reasonable, too small, too large) when the t-statistic is
324
+ # (close to zero, large and positive, large and negative).
325
+ #
326
+ # The returned list is undefined if the regression fails. If weights are
327
+ # input, the returned values are the weighted t statistics.
328
+ #
329
+ def tStatistics
330
+ unless (@tStatInt and @tStatSlope)
331
+ self.regress or return
332
+ biasEstimateInt = sigma() * Math.sqrt(@sumxx / (@sumSqDevx * @numxy))
333
+ @tStatInt = biasEstimateInt != 0 ? @intercept / biasEstimateInt : 0
334
+ biasEstimateSlope = sigma() / Math.sqrt(@sumSqDevx)
335
+ @tStatSlope = biasEstimateSlope != 0 ? @slope / biasEstimateSlope : 0
336
+ end
337
+ return @tStatInt, @tStatSlope
338
+ end
339
+
340
+ ############################################################################
341
+ # Return the variances in the estiamtes of the intercept and slope
342
+ #
343
+ # varianceIntercept, varianceSlope = linefit.varianceOfEstimates
344
+ #
345
+ # Assuming the data are noisy or inaccurate, the intercept and slope
346
+ # returned by the coefficients() method are only estimates of the true
347
+ # intercept and slope. The varianceofEstimate() method returns the
348
+ # variances of the estimates of the intercept and slope, respectively. See
349
+ # Numerical Recipes in C, section 15.2 (Fitting Data to a Straight Line),
350
+ # equation 15.2.9.
351
+ #
352
+ # The returned list is undefined if the regression fails. If weights are
353
+ # input, the returned values are the weighted variances.
354
+ #
355
+ def varianceOfEstimates
356
+ unless @intercept and @slope
357
+ self.regress or return
358
+ end
359
+ predictedYs = predictedYs()
360
+ s = sx = sxx = 0
361
+ if @weight
362
+ 0.upto(@numxy-1) do |i|
363
+ variance = (predictedYs[i] - @y[i]) ** 2
364
+ unless variance == 0
365
+ s += 1.0 / variance
366
+ sx += @weight[i] * @x[i] / variance
367
+ sxx += @weight[i] * @x[i] ** 2 / variance
368
+ end
369
+ end
370
+ else
371
+ 0.upto(@numxy-1) do |i|
372
+ variance = (predictedYs[i] - @y[i]) ** 2
373
+ unless variance == 0
374
+ s += 1.0 / variance
375
+ sx += @x[i] / variance
376
+ sxx += @x[i] ** 2 / variance
377
+ end
378
+ end
379
+ end
380
+ denominator = (s * sxx - sx ** 2)
381
+ if denominator == 0
382
+ return
383
+ else
384
+ return sxx / denominator, s / denominator
385
+ end
386
+ end
387
+
388
+ private
389
+
390
+ ############################################################################
391
+ # Compute sum of x, y, x**2, y**2, and x*y
392
+ #
393
+ def computeSums
394
+ sumx = sumy = sumxx = sumyy = sumxy = 0
395
+ if @weight
396
+ 0.upto(@numxy-1) do |i|
397
+ sumx += @weight[i] * @x[i]
398
+ sumy += @weight[i] * @y[i]
399
+ sumxx += @weight[i] * @x[i] ** 2
400
+ sumyy += @weight[i] * @y[i] ** 2
401
+ sumxy += @weight[i] * @x[i] * @y[i]
402
+ end
403
+ else
404
+ 0.upto(@numxy-1) do |i|
405
+ sumx += @x[i]
406
+ sumy += @y[i]
407
+ sumxx += @x[i] ** 2
408
+ sumyy += @y[i] ** 2
409
+ sumxy += @x[i] * @y[i]
410
+ end
411
+ end
412
+ # Multiply each return value by 1.0 to force them to Floats
413
+ return sumx * 1.0, sumy * 1.0, sumxx * 1.0, sumyy * 1.0, sumxy * 1.0
414
+ end
415
+
416
+ ############################################################################
417
+ # Normalize and initialize line fit weighting factors
418
+ #
419
+ def setWeights(weights = nil)
420
+ return TRUE unless weights
421
+ if weights.length != @numxy
422
+ puts "Length of weight array must equal length of data array!" unless @hush
423
+ return FALSE
424
+ end
425
+ if @validate
426
+ validWeights(weights) or return FALSE
427
+ end
428
+ sumw = numNonZero = 0
429
+ weights.each do |weight|
430
+ if weight < 0
431
+ puts "Weights must be non-negative numbers!" unless @hush
432
+ return FALSE
433
+ end
434
+ sumw += weight
435
+ numNonZero += 1 if weight != 0
436
+ end
437
+ if numNonZero < 2
438
+ puts "At least two weights must be nonzero!" unless @hush
439
+ return FALSE
440
+ end
441
+ factor = weights.length.to_f / sumw
442
+ weights.collect! {|weight| weight * factor}
443
+ @weight = weights
444
+ return TRUE
445
+ end
446
+
447
+ ############################################################################
448
+ # Return the sum of the squared errors
449
+ #
450
+ def sumSqErrors
451
+ unless @sumSqErrors
452
+ self.regress or return
453
+ @sumSqErrors = @sumSqDevy - @sumSqDevx * @slope ** 2
454
+ @sumSqErrors = 0 if @sumSqErrors < 0
455
+ end
456
+ return @sumSqErrors
457
+ end
458
+
459
+ ############################################################################
460
+ # Verify that the input x-y data are numeric
461
+ #
462
+ def validData
463
+ 0.upto(@numxy-1) do |i|
464
+ unless @x[i]
465
+ puts "Input x[#{i}] is not defined" unless @hush
466
+ return FALSE
467
+ end
468
+ if @x[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
469
+ puts "Input x[#{i}] is not a number: #{x[i]}" unless @hush
470
+ return FALSE
471
+ end
472
+ unless @y[i]
473
+ puts "Input y[#{i}] is not defined" unless @hush
474
+ return FALSE
475
+ end
476
+ if @y[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
477
+ puts "Input y[#{i}] is not a number: #{y[i]}" unless @hush
478
+ return FALSE
479
+ end
480
+ end
481
+ return TRUE
482
+ end
483
+
484
+ ############################################################################
485
+ # Verify that the input weights are numeric
486
+ #
487
+ def validWeights(weights)
488
+ 0.upto(weights.length) do |i|
489
+ unless weights[i]
490
+ puts "Input weights[#{i}] is not defined" unless @hush
491
+ return FALSE
492
+ end
493
+ if weights[i] !~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/
494
+ puts "Input weights[#{i}] is not a number: #{weights[i]}" unless @hush
495
+ return FALSE
496
+ end
497
+ end
498
+ return TRUE
499
+ end
500
+
501
+ end
metadata CHANGED
@@ -1,7 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linefit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.3
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Eric Cline
@@ -9,7 +10,7 @@ authors:
9
10
  autorequire:
10
11
  bindir: bin
11
12
  cert_chain: []
12
- date: 2014-07-09 00:00:00.000000000 Z
13
+ date: 2016-10-05 00:00:00.000000000 Z
13
14
  dependencies: []
14
15
  description: LineFit does weighted or unweighted least-squares line fitting to two-dimensional
15
16
  data (y = a + b * x). (Linear Regression)
@@ -25,26 +26,26 @@ files:
25
26
  - CHANGELOG
26
27
  homepage: http://rubygems.org/gems/linefit
27
28
  licenses: []
28
- metadata: {}
29
29
  post_install_message:
30
30
  rdoc_options: []
31
31
  require_paths:
32
32
  - lib
33
33
  required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
34
35
  requirements:
35
- - - '>='
36
+ - - ! '>='
36
37
  - !ruby/object:Gem::Version
37
38
  version: '0'
38
39
  required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
39
41
  requirements:
40
- - - '>='
42
+ - - ! '>='
41
43
  - !ruby/object:Gem::Version
42
44
  version: '0'
43
45
  requirements: []
44
46
  rubyforge_project:
45
- rubygems_version: 2.0.3
47
+ rubygems_version: 1.8.23
46
48
  signing_key:
47
- specification_version: 4
49
+ specification_version: 3
48
50
  summary: LineFit is a linear regression math class.
49
51
  test_files: []
50
- has_rdoc: true
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 16ab681f54fccf1c7a1cc4f400beb14b8ea7ace2
4
- data.tar.gz: ba476f7ac58ec9de0932614dfe51b12632f1a845
5
- SHA512:
6
- metadata.gz: 2860ec08558453a6a7a4f0cac46a15df1deab73737b844db28b18b795955fc4ac9e6750c947c7a248487de11b0827ecbf7df98d8ce8c65f72d9b5cf209a85bc3
7
- data.tar.gz: 5bf28fd58e3c72f3733053006116ca695078b891b56ab47cf47417afd5dde7b635470b4ae5f1367dd9ba7d4bf0ba5ba9f83cbf589bf02a7f21136c7cbafade79