ruby-regress 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.mdown +38 -0
- data/Rakefile +55 -0
- data/VERSION +1 -0
- data/bin/regress +46 -0
- data/lib/ruby-regress.rb +44 -0
- data/test/helper.rb +9 -0
- data/test/test_ruby-regress.rb +64 -0
- metadata +93 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Trevor Fountain
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.mdown
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# ruby-regress
|
2
|
+
### A partial drop-in replacement for |STAT's regress
|
3
|
+
|
4
|
+
`ruby-regress` is a tool for computing correlations and regression equations
|
5
|
+
from two-variable input. It is designed to function as a drop-in replacement
|
6
|
+
for Gary Perlman's `regress`, at least for those who use only the basic
|
7
|
+
functionality that `regress` provides...
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Download the most recent source from Github:
|
12
|
+
|
13
|
+
git clone git://github.com/doches/ruby-regress.git
|
14
|
+
|
15
|
+
then build and install the gem:
|
16
|
+
|
17
|
+
cd ruby-regress
|
18
|
+
rake build
|
19
|
+
sudo rake install
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
ruby-regress installs a single command line tool called `regress`, which
|
24
|
+
reads from `STDIN` and prints a report containing the correlation coefficient,
|
25
|
+
plus some descriptive statistics, to `STDOUT`. For example, if we have a file in
|
26
|
+
the current directory called `data.txt` containing two datasets:
|
27
|
+
|
28
|
+
1 12.0
|
29
|
+
2 11.0
|
30
|
+
3 13.0
|
31
|
+
4 14.0
|
32
|
+
|
33
|
+
we can get the correlation coefficient between these two variables by:
|
34
|
+
|
35
|
+
cat data.txt | regress
|
36
|
+
|
37
|
+
which will dump a load of statistical information about the datasets to
|
38
|
+
the terminal.
|
data/Rakefile
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ruby-regress"
|
8
|
+
gem.summary = %Q{CLI tool for computing correlation (Pearson's r)}
|
9
|
+
gem.description = %Q{Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.}
|
10
|
+
gem.email = "doches@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/doches/ruby-regress"
|
12
|
+
gem.authors = ["Trevor Fountain"]
|
13
|
+
gem.bindir = "bin"
|
14
|
+
gem.executables = %w{regress}
|
15
|
+
gem.add_development_dependency('wrong','>=0.2.0')
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
17
|
+
end
|
18
|
+
Jeweler::GemcutterTasks.new
|
19
|
+
rescue LoadError
|
20
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
21
|
+
end
|
22
|
+
|
23
|
+
require 'rake/testtask'
|
24
|
+
Rake::TestTask.new(:test) do |test|
|
25
|
+
test.libs << 'lib' << 'test'
|
26
|
+
test.pattern = 'test/**/test_*.rb'
|
27
|
+
test.verbose = true
|
28
|
+
end
|
29
|
+
|
30
|
+
begin
|
31
|
+
require 'rcov/rcovtask'
|
32
|
+
Rcov::RcovTask.new do |test|
|
33
|
+
test.libs << 'test'
|
34
|
+
test.pattern = 'test/**/test_*.rb'
|
35
|
+
test.verbose = true
|
36
|
+
end
|
37
|
+
rescue LoadError
|
38
|
+
task :rcov do
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
task :test => :check_dependencies
|
44
|
+
|
45
|
+
task :default => :test
|
46
|
+
|
47
|
+
require 'rake/rdoctask'
|
48
|
+
Rake::RDocTask.new do |rdoc|
|
49
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "ruby-regress #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/bin/regress
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Reads a file containing two whitespace-delimited columns of numerical data
|
4
|
+
# and outputs Pearson's r and some assorted descriptive statistics.
|
5
|
+
#
|
6
|
+
# A partial drop-in replacement for |STAT's `regress` tool, with caveats:
|
7
|
+
#
|
8
|
+
# + Does not compute SEest, F(), or prob()
|
9
|
+
# + Only handles two variables (no more!)
|
10
|
+
# + Does not calculate regression line, because I don't need it.
|
11
|
+
#
|
12
|
+
# ...and extra features:
|
13
|
+
#
|
14
|
+
# + Ignores comment lines (lines starting with '#')
|
15
|
+
# + Ignores anything on a line /after/ a '#'
|
16
|
+
|
17
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__),"..","lib")
|
18
|
+
require 'ruby-regress'
|
19
|
+
|
20
|
+
input = []
|
21
|
+
STDIN.each_line do |line|
|
22
|
+
cols = line.strip.gsub(/#.+$/,'').split(/\s/).map { |x| x.to_f }
|
23
|
+
input.push cols if cols.size == 2
|
24
|
+
end
|
25
|
+
a,b = *[0,1].map { |i| input.collect { |x| x[i] } }
|
26
|
+
|
27
|
+
regress = Regress.new(a,b)
|
28
|
+
puts "Analysis for #{a.size} cases of 2 variables:"
|
29
|
+
puts "Variable REG A "
|
30
|
+
puts "Min#{sprintf("%18.4f",Regress.min(a))}#{sprintf("%11.4f ",Regress.min(b))}"
|
31
|
+
puts "Max#{sprintf("%18.4f",Regress.max(a))}#{sprintf("%11.4f ",Regress.max(b))}"
|
32
|
+
puts "Sum#{sprintf("%18.4f",Regress.sum(a))}#{sprintf("%11.4f ",Regress.sum(b))}"
|
33
|
+
puts "Mean#{sprintf("%17.4f",Regress.mean(a))}#{sprintf("%11.4f ",Regress.mean(b))}"
|
34
|
+
puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}#{sprintf("%11.4f ",Regress.standard_deviation(b))}"
|
35
|
+
puts ""
|
36
|
+
puts "Correlation Matrix:"
|
37
|
+
puts "REG 1.0000 "
|
38
|
+
puts "A #{sprintf("%1.4f",regress.r)} 1.0000 "
|
39
|
+
puts "Variable REG A"
|
40
|
+
puts ""
|
41
|
+
#puts "Regression Equation for REG:"
|
42
|
+
#puts "REG =#{sprintf("%3.3f",Regress.slope)} A +#{sprintf("%4.4f",Regress.intercept)}"
|
43
|
+
#puts ""
|
44
|
+
puts "Significance test for prediction of REG"
|
45
|
+
puts " Mult-R R-Squared "
|
46
|
+
puts " #{sprintf("%9.4f",regress.r)} #{sprintf("%9.4f",regress.r**2)} "
|
data/lib/ruby-regress.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
class Regress
|
2
|
+
attr_reader :r, :slope, :intercept
|
3
|
+
|
4
|
+
# Create a Regress object from two vectors +a+ and +b+. Note that +a+ and +b+ must be of the same length.
|
5
|
+
def initialize(a,b)
|
6
|
+
raise "Regress#initialize expects two vectors of equal length (given vectors of lengths #{a.size}, #{b.size})." if a.size != b.size
|
7
|
+
|
8
|
+
sa,sb = *[a,b].map { |d| Regress.sum(d) }
|
9
|
+
sa2,sb2 = *[a,b].map { |d| Regress.sum(Regress.square(d)) }
|
10
|
+
sab = Regress.multiply(a,b)
|
11
|
+
n = a.size
|
12
|
+
|
13
|
+
@r = (n * sab - sa * sb) / (( (n * sa2 - sa**2) * (n * sb2 - sb**2) ) ** 0.5)
|
14
|
+
end
|
15
|
+
|
16
|
+
def Regress.sum(vector)
|
17
|
+
vector.inject(0) { |s,x| s += x }
|
18
|
+
end
|
19
|
+
|
20
|
+
def Regress.square(vector)
|
21
|
+
vector.map { |x| x**2 }
|
22
|
+
end
|
23
|
+
|
24
|
+
def Regress.multiply(a,b)
|
25
|
+
(0..a.size-1).inject(0) { |s,i| s += a[i] * b[i] }
|
26
|
+
end
|
27
|
+
|
28
|
+
def Regress.min(vector)
|
29
|
+
vector.sort.shift
|
30
|
+
end
|
31
|
+
|
32
|
+
def Regress.max(vector)
|
33
|
+
vector.sort.pop
|
34
|
+
end
|
35
|
+
|
36
|
+
def Regress.mean(vector)
|
37
|
+
Regress.sum(vector) / vector.size.to_f
|
38
|
+
end
|
39
|
+
|
40
|
+
def Regress.standard_deviation(vector)
|
41
|
+
mean = Regress.mean(vector)
|
42
|
+
(vector.collect { |x| (x-mean)**2 }.inject(0) { |s,x| s += x } / (vector.size.to_f-1)) ** 0.5
|
43
|
+
end
|
44
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'ruby-regress'
|
3
|
+
require 'wrong'
|
4
|
+
|
5
|
+
module InDelta
|
6
|
+
def in_delta?(other,delta=0.01)
|
7
|
+
(self - other).abs < delta
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class Float
|
12
|
+
include InDelta
|
13
|
+
end
|
14
|
+
|
15
|
+
class Fixnum
|
16
|
+
include InDelta
|
17
|
+
end
|
18
|
+
|
19
|
+
class TestRubyRegress < Test::Unit::TestCase
|
20
|
+
include Wrong::Assert
|
21
|
+
|
22
|
+
def test_sums
|
23
|
+
@a = [72,65,80,36,50,21,79,64,44,55]
|
24
|
+
@b = [78,70,81,31,55,29,74,64,47,53]
|
25
|
+
|
26
|
+
assert { Regress.sum(@a) == 566 }
|
27
|
+
assert { Regress.sum(@b) == 582 }
|
28
|
+
|
29
|
+
assert { Regress.sum(Regress.square(@a)) == 35344 }
|
30
|
+
assert { Regress.sum(Regress.square(@b)) == 36962 }
|
31
|
+
|
32
|
+
assert { Regress.multiply(@a,@b) == 36046 }
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_stats
|
36
|
+
@a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
|
37
|
+
|
38
|
+
assert { Regress.min(@a).in_delta? 58 }
|
39
|
+
assert { Regress.max(@a).in_delta? 75 }
|
40
|
+
assert { Regress.sum(@a).in_delta? 1308 }
|
41
|
+
assert { Regress.mean(@a).in_delta? 65.4 }
|
42
|
+
assert { Regress.standard_deviation(@a).in_delta? 4.4057,0.0001 }
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_malformed_input
|
46
|
+
assert { rescuing { Regress.new([1,2], [1,2,3]) }.message == "Regress#initialize expects two vectors of equal length (given vectors of lengths 2, 3)." }
|
47
|
+
end
|
48
|
+
|
49
|
+
# This example is for population standard deviation, not sample -- so let's make sure we get it wrong!
|
50
|
+
def test_sd_wikipedia_example
|
51
|
+
a = [2,4,4,4,5,5,7,9]
|
52
|
+
|
53
|
+
assert { Regress.standard_deviation(a) != 2 }
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_correlation
|
57
|
+
@a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
|
58
|
+
@b = [4.1, 4.6, 3.8, 4.4, 3.2, 3.1, 3.8, 4.1, 4.3, 3.7, 3.5, 3.2, 3.7, 3.3, 3.4, 4.0, 4.1, 3.8, 3.4, 3.6]
|
59
|
+
|
60
|
+
regress = Regress.new(@a, @b)
|
61
|
+
|
62
|
+
assert { regress.r.in_delta? 0.73 }
|
63
|
+
end
|
64
|
+
end
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-regress
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Trevor Fountain
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-08-25 00:00:00 +01:00
|
19
|
+
default_executable: regress
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: wrong
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 2
|
33
|
+
- 0
|
34
|
+
version: 0.2.0
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id001
|
37
|
+
description: Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.
|
38
|
+
email: doches@gmail.com
|
39
|
+
executables:
|
40
|
+
- regress
|
41
|
+
extensions: []
|
42
|
+
|
43
|
+
extra_rdoc_files:
|
44
|
+
- LICENSE
|
45
|
+
- README.mdown
|
46
|
+
files:
|
47
|
+
- .document
|
48
|
+
- .gitignore
|
49
|
+
- LICENSE
|
50
|
+
- README.mdown
|
51
|
+
- Rakefile
|
52
|
+
- VERSION
|
53
|
+
- bin/regress
|
54
|
+
- lib/ruby-regress.rb
|
55
|
+
- test/helper.rb
|
56
|
+
- test/test_ruby-regress.rb
|
57
|
+
has_rdoc: true
|
58
|
+
homepage: http://github.com/doches/ruby-regress
|
59
|
+
licenses: []
|
60
|
+
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options:
|
63
|
+
- --charset=UTF-8
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
76
|
+
none: false
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
hash: 3
|
81
|
+
segments:
|
82
|
+
- 0
|
83
|
+
version: "0"
|
84
|
+
requirements: []
|
85
|
+
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 1.3.7
|
88
|
+
signing_key:
|
89
|
+
specification_version: 3
|
90
|
+
summary: CLI tool for computing correlation (Pearson's r)
|
91
|
+
test_files:
|
92
|
+
- test/helper.rb
|
93
|
+
- test/test_ruby-regress.rb
|