ruby-regress 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Trevor Fountain
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,38 @@
1
+ # ruby-regress
2
+ ### A partial drop-in replacement for |STAT's regress
3
+
4
+ `ruby-regress` is a tool for computing correlations and regression equations
5
+ from two-variable input. It is designed to function as a drop-in replacement
6
+ for Gary Perlman's `regress`, at least for those who use only the basic
7
+ functionality that `regress` provides...
8
+
9
+ ## Installation
10
+
11
+ Download the most recent source from Github:
12
+
13
+ git clone git://github.com/doches/ruby-regress.git
14
+
15
+ then build and install the gem:
16
+
17
+ cd ruby-regress
18
+ rake build
19
+ sudo rake install
20
+
21
+ ## Usage
22
+
23
+ ruby-regress installs a single command line tool called `regress`, which
24
+ reads from `STDIN` and prints a report containing the correlation coefficient,
25
+ plus some descriptive statistics, to `STDOUT`. For example, if we have a file in
26
+ the current directory called `data.txt` containing two datasets:
27
+
28
+ 1 12.0
29
+ 2 11.0
30
+ 3 13.0
31
+ 4 14.0
32
+
33
+ we can get the correlation coefficient between these two variables by:
34
+
35
+ cat data.txt | regress
36
+
37
+ which will dump a load of statistical information about the datasets to
38
+ the terminal.
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "ruby-regress"
8
+ gem.summary = %Q{CLI tool for computing correlation (Pearson's r)}
9
+ gem.description = %Q{Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.}
10
+ gem.email = "doches@gmail.com"
11
+ gem.homepage = "http://github.com/doches/ruby-regress"
12
+ gem.authors = ["Trevor Fountain"]
13
+ gem.bindir = "bin"
14
+ gem.executables = %w{regress}
15
+ gem.add_development_dependency('wrong','>=0.2.0')
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ require 'rake/testtask'
24
+ Rake::TestTask.new(:test) do |test|
25
+ test.libs << 'lib' << 'test'
26
+ test.pattern = 'test/**/test_*.rb'
27
+ test.verbose = true
28
+ end
29
+
30
+ begin
31
+ require 'rcov/rcovtask'
32
+ Rcov::RcovTask.new do |test|
33
+ test.libs << 'test'
34
+ test.pattern = 'test/**/test_*.rb'
35
+ test.verbose = true
36
+ end
37
+ rescue LoadError
38
+ task :rcov do
39
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
40
+ end
41
+ end
42
+
43
+ task :test => :check_dependencies
44
+
45
+ task :default => :test
46
+
47
+ require 'rake/rdoctask'
48
+ Rake::RDocTask.new do |rdoc|
49
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "ruby-regress #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reads a file containing two whitespace-delimited columns of numerical data
4
+ # and outputs Pearson's r and some assorted descriptive statistics.
5
+ #
6
+ # A partial drop-in replacement for |STAT's `regress` tool, with caveats:
7
+ #
8
+ # + Does not compute SEest, F(), or prob()
9
+ # + Only handles two variables (no more!)
10
+ # + Does not calculate regression line, because I don't need it.
11
+ #
12
+ # ...and extra features:
13
+ #
14
+ # + Ignores comment lines (lines starting with '#')
15
+ # + Ignores anything on a line /after/ a '#'
16
+
17
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__),"..","lib")
18
+ require 'ruby-regress'
19
+
20
+ input = []
21
+ STDIN.each_line do |line|
22
+ cols = line.strip.gsub(/#.+$/,'').split(/\s/).map { |x| x.to_f }
23
+ input.push cols if cols.size == 2
24
+ end
25
+ a,b = *[0,1].map { |i| input.collect { |x| x[i] } }
26
+
27
+ regress = Regress.new(a,b)
28
+ puts "Analysis for #{a.size} cases of 2 variables:"
29
+ puts "Variable REG A "
30
+ puts "Min#{sprintf("%18.4f",Regress.min(a))}#{sprintf("%11.4f ",Regress.min(b))}"
31
+ puts "Max#{sprintf("%18.4f",Regress.max(a))}#{sprintf("%11.4f ",Regress.max(b))}"
32
+ puts "Sum#{sprintf("%18.4f",Regress.sum(a))}#{sprintf("%11.4f ",Regress.sum(b))}"
33
+ puts "Mean#{sprintf("%17.4f",Regress.mean(a))}#{sprintf("%11.4f ",Regress.mean(b))}"
34
+ puts "SD#{sprintf("%19.4f",Regress.standard_deviation(a))}#{sprintf("%11.4f ",Regress.standard_deviation(b))}"
35
+ puts ""
36
+ puts "Correlation Matrix:"
37
+ puts "REG 1.0000 "
38
+ puts "A #{sprintf("%1.4f",regress.r)} 1.0000 "
39
+ puts "Variable REG A"
40
+ puts ""
41
+ #puts "Regression Equation for REG:"
42
+ #puts "REG =#{sprintf("%3.3f",Regress.slope)} A +#{sprintf("%4.4f",Regress.intercept)}"
43
+ #puts ""
44
+ puts "Significance test for prediction of REG"
45
+ puts " Mult-R R-Squared "
46
+ puts " #{sprintf("%9.4f",regress.r)} #{sprintf("%9.4f",regress.r**2)} "
@@ -0,0 +1,44 @@
1
+ class Regress
2
+ attr_reader :r, :slope, :intercept
3
+
4
+ # Create a Regress object from two vectors +a+ and +b+. Note that +a+ and +b+ must be of the same length.
5
+ def initialize(a,b)
6
+ raise "Regress#initialize expects two vectors of equal length (given vectors of lengths #{a.size}, #{b.size})." if a.size != b.size
7
+
8
+ sa,sb = *[a,b].map { |d| Regress.sum(d) }
9
+ sa2,sb2 = *[a,b].map { |d| Regress.sum(Regress.square(d)) }
10
+ sab = Regress.multiply(a,b)
11
+ n = a.size
12
+
13
+ @r = (n * sab - sa * sb) / (( (n * sa2 - sa**2) * (n * sb2 - sb**2) ) ** 0.5)
14
+ end
15
+
16
+ def Regress.sum(vector)
17
+ vector.inject(0) { |s,x| s += x }
18
+ end
19
+
20
+ def Regress.square(vector)
21
+ vector.map { |x| x**2 }
22
+ end
23
+
24
+ def Regress.multiply(a,b)
25
+ (0..a.size-1).inject(0) { |s,i| s += a[i] * b[i] }
26
+ end
27
+
28
+ def Regress.min(vector)
29
+ vector.sort.shift
30
+ end
31
+
32
+ def Regress.max(vector)
33
+ vector.sort.pop
34
+ end
35
+
36
+ def Regress.mean(vector)
37
+ Regress.sum(vector) / vector.size.to_f
38
+ end
39
+
40
+ def Regress.standard_deviation(vector)
41
+ mean = Regress.mean(vector)
42
+ (vector.collect { |x| (x-mean)**2 }.inject(0) { |s,x| s += x } / (vector.size.to_f-1)) ** 0.5
43
+ end
44
+ end
@@ -0,0 +1,9 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+
4
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ require 'ruby-regress'
7
+
8
+ class Test::Unit::TestCase
9
+ end
@@ -0,0 +1,64 @@
1
+ require 'helper'
2
+ require 'ruby-regress'
3
+ require 'wrong'
4
+
5
+ module InDelta
6
+ def in_delta?(other,delta=0.01)
7
+ (self - other).abs < delta
8
+ end
9
+ end
10
+
11
+ class Float
12
+ include InDelta
13
+ end
14
+
15
+ class Fixnum
16
+ include InDelta
17
+ end
18
+
19
+ class TestRubyRegress < Test::Unit::TestCase
20
+ include Wrong::Assert
21
+
22
+ def test_sums
23
+ @a = [72,65,80,36,50,21,79,64,44,55]
24
+ @b = [78,70,81,31,55,29,74,64,47,53]
25
+
26
+ assert { Regress.sum(@a) == 566 }
27
+ assert { Regress.sum(@b) == 582 }
28
+
29
+ assert { Regress.sum(Regress.square(@a)) == 35344 }
30
+ assert { Regress.sum(Regress.square(@b)) == 36962 }
31
+
32
+ assert { Regress.multiply(@a,@b) == 36046 }
33
+ end
34
+
35
+ def test_stats
36
+ @a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
37
+
38
+ assert { Regress.min(@a).in_delta? 58 }
39
+ assert { Regress.max(@a).in_delta? 75 }
40
+ assert { Regress.sum(@a).in_delta? 1308 }
41
+ assert { Regress.mean(@a).in_delta? 65.4 }
42
+ assert { Regress.standard_deviation(@a).in_delta? 4.4057,0.0001 }
43
+ end
44
+
45
+ def test_malformed_input
46
+ assert { rescuing { Regress.new([1,2], [1,2,3]) }.message == "Regress#initialize expects two vectors of equal length (given vectors of lengths 2, 3)." }
47
+ end
48
+
49
+ # This example is for population standard deviation, not sample -- so let's make sure we get it wrong!
50
+ def test_sd_wikipedia_example
51
+ a = [2,4,4,4,5,5,7,9]
52
+
53
+ assert { Regress.standard_deviation(a) != 2 }
54
+ end
55
+
56
+ def test_correlation
57
+ @a = [68, 71, 62, 75, 58, 60, 67, 68, 71, 69, 68, 67, 63, 62, 60, 63, 65, 67, 63, 61]
58
+ @b = [4.1, 4.6, 3.8, 4.4, 3.2, 3.1, 3.8, 4.1, 4.3, 3.7, 3.5, 3.2, 3.7, 3.3, 3.4, 4.0, 4.1, 3.8, 3.4, 3.6]
59
+
60
+ regress = Regress.new(@a, @b)
61
+
62
+ assert { regress.r.in_delta? 0.73 }
63
+ end
64
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-regress
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Trevor Fountain
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-08-25 00:00:00 +01:00
19
+ default_executable: regress
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: wrong
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 23
30
+ segments:
31
+ - 0
32
+ - 2
33
+ - 0
34
+ version: 0.2.0
35
+ type: :development
36
+ version_requirements: *id001
37
+ description: Ruby implementation of Gary Perlman's `regress` tool from (the difficult to obtain) |STAT package.
38
+ email: doches@gmail.com
39
+ executables:
40
+ - regress
41
+ extensions: []
42
+
43
+ extra_rdoc_files:
44
+ - LICENSE
45
+ - README.mdown
46
+ files:
47
+ - .document
48
+ - .gitignore
49
+ - LICENSE
50
+ - README.mdown
51
+ - Rakefile
52
+ - VERSION
53
+ - bin/regress
54
+ - lib/ruby-regress.rb
55
+ - test/helper.rb
56
+ - test/test_ruby-regress.rb
57
+ has_rdoc: true
58
+ homepage: http://github.com/doches/ruby-regress
59
+ licenses: []
60
+
61
+ post_install_message:
62
+ rdoc_options:
63
+ - --charset=UTF-8
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ hash: 3
72
+ segments:
73
+ - 0
74
+ version: "0"
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.7
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: CLI tool for computing correlation (Pearson's r)
91
+ test_files:
92
+ - test/helper.rb
93
+ - test/test_ruby-regress.rb