ruby-regress 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.mdown +38 -0
- data/Rakefile +55 -0
- data/VERSION +1 -0
- data/bin/regress +46 -0
- data/lib/ruby-regress.rb +44 -0
- data/test/helper.rb +9 -0
- data/test/test_ruby-regress.rb +64 -0
- metadata +93 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2009 Trevor Fountain
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.mdown
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# ruby-regress
|
|
2
|
+
### A partial drop-in replacement for |STAT's regress
|
|
3
|
+
|
|
4
|
+
`ruby-regress` is a tool for computing correlations and regression equations
|
|
5
|
+
from two-variable input. It is designed to function as a drop-in replacement
|
|
6
|
+
for Gary Perlman's `regress`, at least for those who use only the basic
|
|
7
|
+
functionality that `regress` provides...
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
Download the most recent source from Github:
|
|
12
|
+
|
|
13
|
+
git clone git://github.com/doches/ruby-regress.git
|
|
14
|
+
|
|
15
|
+
then build and install the gem:
|
|
16
|
+
|
|
17
|
+
cd ruby-regress
|
|
18
|
+
rake build
|
|
19
|
+
sudo rake install
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
ruby-regress installs a single command line tool called `regress`, which
|
|
24
|
+
reads from `STDIN` and prints a report containing the correlation coefficient,
|
|
25
|
+
plus some descriptive statistics, to `STDOUT`. For example, if we have a file in
|
|
26
|
+
the current directory called `data.txt` containing two datasets:
|
|
27
|
+
|
|
28
|
+
1 12.0
|
|
29
|
+
2 11.0
|
|
30
|
+
3 13.0
|
|
31
|
+
4 14.0
|
|
32
|
+
|
|
33
|
+
we can get the correlation coefficient between these two variables by:
|
|
34
|
+
|
|
35
|
+
cat data.txt | regress
|
|
36
|
+
|
|
37
|
+
which will dump a load of statistical information about the datasets to
|
|
38
|
+
the terminal.
|
data/Rakefile
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'rake'
|
|
3
|
+
|
|
4
|
+
begin
|
|
5
|
+
require 'jeweler'
|
|
6
|
+
Jeweler::Tasks.new do |gem|
|
|
7
|
+
gem.name = "ruby-regress"
|
|
8
|
+
gem.summary = %Q{CLI tool for computing correlation (Pearson's r)}
|
|
9
|
+
gem.description = %Q{Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.}
|
|
10
|
+
gem.email = "doches@gmail.com"
|
|
11
|
+
gem.homepage = "http://github.com/doches/ruby-regress"
|
|
12
|
+
gem.authors = ["Trevor Fountain"]
|
|
13
|
+
gem.bindir = "bin"
|
|
14
|
+
gem.executables = %w{regress}
|
|
15
|
+
gem.add_development_dependency('wrong','>=0.2.0')
|
|
16
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
|
17
|
+
end
|
|
18
|
+
Jeweler::GemcutterTasks.new
|
|
19
|
+
rescue LoadError
|
|
20
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
require 'rake/testtask'
|
|
24
|
+
Rake::TestTask.new(:test) do |test|
|
|
25
|
+
test.libs << 'lib' << 'test'
|
|
26
|
+
test.pattern = 'test/**/test_*.rb'
|
|
27
|
+
test.verbose = true
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
begin
|
|
31
|
+
require 'rcov/rcovtask'
|
|
32
|
+
Rcov::RcovTask.new do |test|
|
|
33
|
+
test.libs << 'test'
|
|
34
|
+
test.pattern = 'test/**/test_*.rb'
|
|
35
|
+
test.verbose = true
|
|
36
|
+
end
|
|
37
|
+
rescue LoadError
|
|
38
|
+
task :rcov do
|
|
39
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
task :test => :check_dependencies
|
|
44
|
+
|
|
45
|
+
task :default => :test
|
|
46
|
+
|
|
47
|
+
require 'rake/rdoctask'
|
|
48
|
+
Rake::RDocTask.new do |rdoc|
|
|
49
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
50
|
+
|
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
52
|
+
rdoc.title = "ruby-regress #{version}"
|
|
53
|
+
rdoc.rdoc_files.include('README*')
|
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
55
|
+
end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.0.1
|
data/bin/regress
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
# Reads a file containing two whitespace-delimited columns of numerical data
|
|
4
|
+
# and outputs Pearson's r and some assorted descriptive statistics.
|
|
5
|
+
#
|
|
6
|
+
# A partial drop-in replacement for |STAT's `regress` tool, with caveats:
|
|
7
|
+
#
|
|
8
|
+
# + Does not compute SEest, F(), or prob()
|
|
9
|
+
# + Only handles two variables (no more!)
|
|
10
|
+
# + Does not calculate regression line, because I don't need it.
|
|
11
|
+
#
|
|
12
|
+
# ...and extra features:
|
|
13
|
+
#
|
|
14
|
+
# + Ignores comment lines (lines starting with '#')
|
|
15
|
+
# + Ignores anything on a line /after/ a '#'
|
|
16
|
+
|
|
17
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__),"..","lib")
|
|
18
|
+
require 'ruby-regress'
|
|
19
|
+
|
|
20
|
+
input = []
|
|
21
|
+
STDIN.each_line do |line|
|
|
22
|
+
cols = line.strip.gsub(/#.+$/,'').split(/\s/).map { |x| x.to_f }
|
|
23
|
+
input.push cols if cols.size == 2
|
|
24
|
+
end
|
|
25
|
+
a,b = *[0,1].map { |i| input.collect { |x| x[i] } }
|
|
26
|
+
|
|
27
|
+
regress = Regress.new(a,b)
|
|
28
|
+
puts "Analysis for #{a.size} cases of 2 variables:"
|
|
29
|
+
puts "Variable REG A "
|
|
30
|
+
puts "Min#{sprintf("%18.4f",Regress.min(a))}#{sprintf("%11.4f ",Regress.min(b))}"
|
|
31
|
+
puts "Max#{sprintf("%18.4f",Regress.max(a))}#{sprintf("%11.4f ",Regress.max(b))}"
|
|
32
|
+
puts "Sum#{sprintf("%18.4f",Regress.sum(a))}#{sprintf("%11.4f ",Regress.sum(b))}"
|
|
33
|
+
puts "Mean#{sprintf("%17.4f",Regress.mean(a))}#{sprintf("%11.4f ",Regress.mean(b))}"
|
|
34
|
+
puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}#{sprintf("%11.4f ",Regress.standard_deviation(b))}"
|
|
35
|
+
puts ""
|
|
36
|
+
puts "Correlation Matrix:"
|
|
37
|
+
puts "REG 1.0000 "
|
|
38
|
+
puts "A #{sprintf("%1.4f",regress.r)} 1.0000 "
|
|
39
|
+
puts "Variable REG A"
|
|
40
|
+
puts ""
|
|
41
|
+
#puts "Regression Equation for REG:"
|
|
42
|
+
#puts "REG =#{sprintf("%3.3f",Regress.slope)} A +#{sprintf("%4.4f",Regress.intercept)}"
|
|
43
|
+
#puts ""
|
|
44
|
+
puts "Significance test for prediction of REG"
|
|
45
|
+
puts " Mult-R R-Squared "
|
|
46
|
+
puts " #{sprintf("%9.4f",regress.r)} #{sprintf("%9.4f",regress.r**2)} "
|
data/lib/ruby-regress.rb
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
class Regress
|
|
2
|
+
attr_reader :r, :slope, :intercept
|
|
3
|
+
|
|
4
|
+
# Create a Regress object from two vectors +a+ and +b+. Note that +a+ and +b+ must be of the same length.
|
|
5
|
+
def initialize(a,b)
|
|
6
|
+
raise "Regress#initialize expects two vectors of equal length (given vectors of lengths #{a.size}, #{b.size})." if a.size != b.size
|
|
7
|
+
|
|
8
|
+
sa,sb = *[a,b].map { |d| Regress.sum(d) }
|
|
9
|
+
sa2,sb2 = *[a,b].map { |d| Regress.sum(Regress.square(d)) }
|
|
10
|
+
sab = Regress.multiply(a,b)
|
|
11
|
+
n = a.size
|
|
12
|
+
|
|
13
|
+
@r = (n * sab - sa * sb) / (( (n * sa2 - sa**2) * (n * sb2 - sb**2) ) ** 0.5)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def Regress.sum(vector)
|
|
17
|
+
vector.inject(0) { |s,x| s += x }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def Regress.square(vector)
|
|
21
|
+
vector.map { |x| x**2 }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def Regress.multiply(a,b)
|
|
25
|
+
(0..a.size-1).inject(0) { |s,i| s += a[i] * b[i] }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def Regress.min(vector)
|
|
29
|
+
vector.sort.shift
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def Regress.max(vector)
|
|
33
|
+
vector.sort.pop
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def Regress.mean(vector)
|
|
37
|
+
Regress.sum(vector) / vector.size.to_f
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def Regress.standard_deviation(vector)
|
|
41
|
+
mean = Regress.mean(vector)
|
|
42
|
+
(vector.collect { |x| (x-mean)**2 }.inject(0) { |s,x| s += x } / (vector.size.to_f-1)) ** 0.5
|
|
43
|
+
end
|
|
44
|
+
end
|
data/test/helper.rb
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'helper'
|
|
2
|
+
require 'ruby-regress'
|
|
3
|
+
require 'wrong'
|
|
4
|
+
|
|
5
|
+
module InDelta
|
|
6
|
+
def in_delta?(other,delta=0.01)
|
|
7
|
+
(self - other).abs < delta
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Float
|
|
12
|
+
include InDelta
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
class Fixnum
|
|
16
|
+
include InDelta
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
class TestRubyRegress < Test::Unit::TestCase
|
|
20
|
+
include Wrong::Assert
|
|
21
|
+
|
|
22
|
+
def test_sums
|
|
23
|
+
@a = [72,65,80,36,50,21,79,64,44,55]
|
|
24
|
+
@b = [78,70,81,31,55,29,74,64,47,53]
|
|
25
|
+
|
|
26
|
+
assert { Regress.sum(@a) == 566 }
|
|
27
|
+
assert { Regress.sum(@b) == 582 }
|
|
28
|
+
|
|
29
|
+
assert { Regress.sum(Regress.square(@a)) == 35344 }
|
|
30
|
+
assert { Regress.sum(Regress.square(@b)) == 36962 }
|
|
31
|
+
|
|
32
|
+
assert { Regress.multiply(@a,@b) == 36046 }
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def test_stats
|
|
36
|
+
@a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
|
|
37
|
+
|
|
38
|
+
assert { Regress.min(@a).in_delta? 58 }
|
|
39
|
+
assert { Regress.max(@a).in_delta? 75 }
|
|
40
|
+
assert { Regress.sum(@a).in_delta? 1308 }
|
|
41
|
+
assert { Regress.mean(@a).in_delta? 65.4 }
|
|
42
|
+
assert { Regress.standard_deviation(@a).in_delta? 4.4057,0.0001 }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def test_malformed_input
|
|
46
|
+
assert { rescuing { Regress.new([1,2], [1,2,3]) }.message == "Regress#initialize expects two vectors of equal length (given vectors of lengths 2, 3)." }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# This example is for population standard deviation, not sample -- so let's make sure we get it wrong!
|
|
50
|
+
def test_sd_wikipedia_example
|
|
51
|
+
a = [2,4,4,4,5,5,7,9]
|
|
52
|
+
|
|
53
|
+
assert { Regress.standard_deviation(a) != 2 }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def test_correlation
|
|
57
|
+
@a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
|
|
58
|
+
@b = [4.1, 4.6, 3.8, 4.4, 3.2, 3.1, 3.8, 4.1, 4.3, 3.7, 3.5, 3.2, 3.7, 3.3, 3.4, 4.0, 4.1, 3.8, 3.4, 3.6]
|
|
59
|
+
|
|
60
|
+
regress = Regress.new(@a, @b)
|
|
61
|
+
|
|
62
|
+
assert { regress.r.in_delta? 0.73 }
|
|
63
|
+
end
|
|
64
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ruby-regress
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 29
|
|
5
|
+
prerelease: false
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 0
|
|
9
|
+
- 1
|
|
10
|
+
version: 0.0.1
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Trevor Fountain
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2010-08-25 00:00:00 +01:00
|
|
19
|
+
default_executable: regress
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: wrong
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 23
|
|
30
|
+
segments:
|
|
31
|
+
- 0
|
|
32
|
+
- 2
|
|
33
|
+
- 0
|
|
34
|
+
version: 0.2.0
|
|
35
|
+
type: :development
|
|
36
|
+
version_requirements: *id001
|
|
37
|
+
description: Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.
|
|
38
|
+
email: doches@gmail.com
|
|
39
|
+
executables:
|
|
40
|
+
- regress
|
|
41
|
+
extensions: []
|
|
42
|
+
|
|
43
|
+
extra_rdoc_files:
|
|
44
|
+
- LICENSE
|
|
45
|
+
- README.mdown
|
|
46
|
+
files:
|
|
47
|
+
- .document
|
|
48
|
+
- .gitignore
|
|
49
|
+
- LICENSE
|
|
50
|
+
- README.mdown
|
|
51
|
+
- Rakefile
|
|
52
|
+
- VERSION
|
|
53
|
+
- bin/regress
|
|
54
|
+
- lib/ruby-regress.rb
|
|
55
|
+
- test/helper.rb
|
|
56
|
+
- test/test_ruby-regress.rb
|
|
57
|
+
has_rdoc: true
|
|
58
|
+
homepage: http://github.com/doches/ruby-regress
|
|
59
|
+
licenses: []
|
|
60
|
+
|
|
61
|
+
post_install_message:
|
|
62
|
+
rdoc_options:
|
|
63
|
+
- --charset=UTF-8
|
|
64
|
+
require_paths:
|
|
65
|
+
- lib
|
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
|
+
none: false
|
|
68
|
+
requirements:
|
|
69
|
+
- - ">="
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
hash: 3
|
|
72
|
+
segments:
|
|
73
|
+
- 0
|
|
74
|
+
version: "0"
|
|
75
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
|
+
none: false
|
|
77
|
+
requirements:
|
|
78
|
+
- - ">="
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
hash: 3
|
|
81
|
+
segments:
|
|
82
|
+
- 0
|
|
83
|
+
version: "0"
|
|
84
|
+
requirements: []
|
|
85
|
+
|
|
86
|
+
rubyforge_project:
|
|
87
|
+
rubygems_version: 1.3.7
|
|
88
|
+
signing_key:
|
|
89
|
+
specification_version: 3
|
|
90
|
+
summary: CLI tool for computing correlation (Pearson's r)
|
|
91
|
+
test_files:
|
|
92
|
+
- test/helper.rb
|
|
93
|
+
- test/test_ruby-regress.rb
|