ruby-regress 0.0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.mdown +21 -2
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/bin/regress +39 -23
- data/lib/ruby-regress.rb +14 -9
- metadata +10 -15
- data/.document +0 -5
- data/.gitignore +0 -21
data/README.mdown
CHANGED
@@ -4,10 +4,29 @@
|
|
4
4
|
`ruby-regress` is a tool for computing correlations and regression equations
|
5
5
|
from two-variable input. It is designed to function as a drop-in replacement
|
6
6
|
for Gary Perlman's `regress`, at least for those who use only the basic
|
7
|
-
functionality that `regress` provides
|
7
|
+
functionality that `regress` provides.
|
8
8
|
|
9
|
+
## Why!?
|
10
|
+
|
11
|
+
The problem with Gary Perlman's excellent |STAT programs is twofold:
|
12
|
+
|
13
|
+
+ The only way of obtaining a copy is by emailing Perlman and asking for it.
|
14
|
+
+ The distribution of modified copies is [expressely forbidden](http://oldwww.acm.org/perlman/stat/history.html#conditions).
|
15
|
+
|
16
|
+
If you need bulletproof robustness you're probably better off dealing with Perlman's terms of access and using |STAT; if you want ease of installation, try `ruby-regress`.
|
9
17
|
## Installation
|
10
18
|
|
19
|
+
#### From rubygems
|
20
|
+
|
21
|
+
If you're using Gemcutter.org as a your gem host (hint: you almost certainly are) you can
|
22
|
+
install ruby-regress using rubygems:
|
23
|
+
|
24
|
+
gem install ruby-regress
|
25
|
+
|
26
|
+
which installs the `regress` executable.
|
27
|
+
|
28
|
+
#### From source
|
29
|
+
|
11
30
|
Download the most recent source from Github:
|
12
31
|
|
13
32
|
git clone git://github.com/doches/ruby-regress.git
|
@@ -35,4 +54,4 @@ we can get the correlation coefficient between these two variables by:
|
|
35
54
|
cat data.txt | regress
|
36
55
|
|
37
56
|
which will dump a load of statistical information about the datasets to
|
38
|
-
the terminal.
|
57
|
+
the terminal.
|
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.1
|
data/bin/regress
CHANGED
@@ -19,28 +19,44 @@ require 'ruby-regress'
|
|
19
19
|
|
20
20
|
input = []
|
21
21
|
STDIN.each_line do |line|
|
22
|
-
|
23
|
-
input.push cols if cols.size
|
22
|
+
cols = line.strip.gsub(/#.+$/,'').split(/\s/).map { |x| x.to_f }
|
23
|
+
input.push cols if cols.size > 0
|
24
24
|
end
|
25
|
-
|
25
|
+
if input[0].size == 1 # Single column, stats only
|
26
|
+
a = input.flatten
|
27
|
+
puts "Analysis for #{a.size} cases of 1 variable:"
|
28
|
+
puts "Variable A"
|
29
|
+
puts "Min#{sprintf("%18.4f",Regress.min(a))}"
|
30
|
+
puts "Max#{sprintf("%18.4f",Regress.max(a))}"
|
31
|
+
puts "Sum#{sprintf("%18.4f",Regress.sum(a))}"
|
32
|
+
puts "Mean#{sprintf("%17.4f",Regress.mean(a))}"
|
33
|
+
puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}"
|
34
|
+
elsif input[0].size == 2 # Two column, stats + regression
|
35
|
+
a,b = *[0,1].map { |i| input.collect { |x| x[i] } }
|
26
36
|
|
27
|
-
regress = Regress.new(a,b)
|
28
|
-
puts "Analysis for #{a.size} cases of 2 variables:"
|
29
|
-
puts "Variable REG A "
|
30
|
-
puts "Min#{sprintf("%18.4f",Regress.min(a))}#{sprintf("%11.4f ",Regress.min(b))}"
|
31
|
-
puts "Max#{sprintf("%18.4f",Regress.max(a))}#{sprintf("%11.4f ",Regress.max(b))}"
|
32
|
-
puts "Sum#{sprintf("%18.4f",Regress.sum(a))}#{sprintf("%11.4f ",Regress.sum(b))}"
|
33
|
-
puts "Mean#{sprintf("%17.4f",Regress.mean(a))}#{sprintf("%11.4f ",Regress.mean(b))}"
|
34
|
-
puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}#{sprintf("%11.4f ",Regress.standard_deviation(b))}"
|
35
|
-
puts ""
|
36
|
-
puts "Correlation Matrix:"
|
37
|
-
puts "REG 1.0000 "
|
38
|
-
puts "A #{sprintf("%1.4f",regress.r)} 1.0000 "
|
39
|
-
puts "Variable REG A"
|
40
|
-
puts ""
|
41
|
-
#puts "Regression Equation for REG:"
|
42
|
-
#puts "REG =#{sprintf("%3.3f",Regress.slope)} A +#{sprintf("%4.4f",Regress.intercept)}"
|
43
|
-
#puts ""
|
44
|
-
puts "Significance test for prediction of REG"
|
45
|
-
puts " Mult-R R-Squared "
|
46
|
-
puts " #{sprintf("%9.4f",regress.r)} #{sprintf("%9.4f",regress.r**2)} "
|
37
|
+
regress = Regress.new(a,b)
|
38
|
+
puts "Analysis for #{a.size} cases of 2 variables:"
|
39
|
+
puts "Variable REG A "
|
40
|
+
puts "Min#{sprintf("%18.4f",Regress.min(a))}#{sprintf("%11.4f ",Regress.min(b))}"
|
41
|
+
puts "Max#{sprintf("%18.4f",Regress.max(a))}#{sprintf("%11.4f ",Regress.max(b))}"
|
42
|
+
puts "Sum#{sprintf("%18.4f",Regress.sum(a))}#{sprintf("%11.4f ",Regress.sum(b))}"
|
43
|
+
puts "Mean#{sprintf("%17.4f",Regress.mean(a))}#{sprintf("%11.4f ",Regress.mean(b))}"
|
44
|
+
puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}#{sprintf("%11.4f ",Regress.standard_deviation(b))}"
|
45
|
+
puts ""
|
46
|
+
puts "Correlation Matrix:"
|
47
|
+
puts "REG 1.0000 "
|
48
|
+
puts "A #{sprintf("%1.4f",regress.r)} 1.0000 "
|
49
|
+
puts "Variable REG A"
|
50
|
+
puts ""
|
51
|
+
#puts "Regression Equation for REG:"
|
52
|
+
#puts "REG =#{sprintf("%3.3f",Regress.slope)} A +#{sprintf("%4.4f",Regress.intercept)}"
|
53
|
+
#puts ""
|
54
|
+
puts "Significance test for prediction of REG"
|
55
|
+
puts " Mult-R R-Squared "
|
56
|
+
puts " #{sprintf("%9.4f",regress.r)} #{sprintf("%9.4f",regress.r**2)} "
|
57
|
+
else
|
58
|
+
STDERR.puts "> 2 column input not supported."
|
59
|
+
STDERR.puts ""
|
60
|
+
STDERR.puts "Sorry."
|
61
|
+
exit(1)
|
62
|
+
end
|
data/lib/ruby-regress.rb
CHANGED
@@ -2,15 +2,20 @@ class Regress
|
|
2
2
|
attr_reader :r, :slope, :intercept
|
3
3
|
|
4
4
|
# Create a Regress object from two vectors +a+ and +b+. Note that +a+ and +b+ must be of the same length.
|
5
|
-
def initialize(a,b)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
def initialize(a,b=nil)
|
6
|
+
if b.nil?
|
7
|
+
# Don't do regression if we're only given one item
|
8
|
+
else
|
9
|
+
raise "Regress#initialize expects two vectors of equal length (given vectors of lengths #{a.size}, #{b.size})." if a.size != b.size
|
10
|
+
|
11
|
+
sa,sb = *[a,b].map { |d| Regress.sum(d) }
|
12
|
+
sa2,sb2 = *[a,b].map { |d| Regress.sum(Regress.square(d)) }
|
13
|
+
sab = Regress.multiply(a,b)
|
14
|
+
n = a.size
|
15
|
+
|
16
|
+
@r = (n * sab - sa * sb) / (( (n * sa2 - sa**2) * (n * sb2 - sb**2) ) ** 0.5)
|
17
|
+
@r = 0.0 if @r.nan?
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
16
21
|
def Regress.sum(vector)
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-regress
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 25
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 0
|
9
8
|
- 1
|
10
|
-
|
9
|
+
- 1
|
10
|
+
version: 0.1.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Trevor Fountain
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable: regress
|
18
|
+
date: 2012-04-17 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: wrong
|
@@ -44,8 +43,6 @@ extra_rdoc_files:
|
|
44
43
|
- LICENSE
|
45
44
|
- README.mdown
|
46
45
|
files:
|
47
|
-
- .document
|
48
|
-
- .gitignore
|
49
46
|
- LICENSE
|
50
47
|
- README.mdown
|
51
48
|
- Rakefile
|
@@ -54,13 +51,12 @@ files:
|
|
54
51
|
- lib/ruby-regress.rb
|
55
52
|
- test/helper.rb
|
56
53
|
- test/test_ruby-regress.rb
|
57
|
-
has_rdoc: true
|
58
54
|
homepage: http://github.com/doches/ruby-regress
|
59
55
|
licenses: []
|
60
56
|
|
61
57
|
post_install_message:
|
62
|
-
rdoc_options:
|
63
|
-
|
58
|
+
rdoc_options: []
|
59
|
+
|
64
60
|
require_paths:
|
65
61
|
- lib
|
66
62
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -84,10 +80,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
80
|
requirements: []
|
85
81
|
|
86
82
|
rubyforge_project:
|
87
|
-
rubygems_version: 1.
|
83
|
+
rubygems_version: 1.8.15
|
88
84
|
signing_key:
|
89
85
|
specification_version: 3
|
90
86
|
summary: CLI tool for computing correlation (Pearson's r)
|
91
|
-
test_files:
|
92
|
-
|
93
|
-
- test/test_ruby-regress.rb
|
87
|
+
test_files: []
|
88
|
+
|
data/.document
DELETED