RubyGems - harlequin - Versions diffs - 0.0.1 → 0.0.2 - Mend

harlequin 0.0.1 → 0.0.2

Files changed (7) hide show

data/README.markdown CHANGED

@@ -1,3 +1,4 @@
+# About #
 Harlequin is a gem that allows easy access to the linear and quadratic discriminant analysis functions of R. To use harlequin, initialize a DiscriminantAnalysis object with an array of variable names for analysis, and a classification variable name as a second argument, like so:
 ```ruby
@@ -20,7 +21,7 @@ Initialize linear or quadratic analysis with ```#init_lda_analysis``` or ```#ini
 ```ruby
 analysis.init_lda_analysis
-analysis.predict(:weight => 180, :height => 68) #=> {:class=>"male", :confidence=>0.9999999999666846}
+analysis.predict(:weight => 180, :height => 68) # => {:class=>"male", :confidence=>0.9999999999666846}
 ```
 Multiple predictions can be computed at once in the same way as adding multiple training rows.
@@ -28,5 +29,10 @@ Multiple predictions can be computed at once in the same way as adding multiple
 In order to assess the effectiveness of adding a variable, the DiscriminantAnalysis class includes access to the two-sample t-test for difference in means between classes. This currently works for binary classification only.
 ```ruby
-analysis.t_test(:weight) #=> { :t_statistic=>12.0748, :degrees_of_freedom=>1.471, :p_value=>0.01898 }
-```
+analysis.t_test(:weight) # => {:t_statistic=>12.0748, :degrees_of_freedom=>1.471, :p_value=>0.01898}
+```
+# Requirements #
+A Ruby script using Harlequin requires an R instance, so make sure you have a working copy of R installed on your system. The OSX binaries for R can be found [here](http://cran.r-project.org/bin/macosx/). See the documentation for Rinruby for more details.
+You will also need the additional R packages MASS and alr3. These can be installed with the R command line by first choosing a mirror with ```chooseCRANmirror()``` and then installing with ```install.packages(c("MASS"), c("alr3"))```.

data/harlequin.gemspec CHANGED

@@ -21,5 +21,6 @@ Gem::Specification.new do |s|
   # specify any dependencies here
   s.add_development_dependency "rspec"
+  s.add_development_dependency "pry"
   s.add_dependency             "rinruby"
 end

data/lib/harlequin.rb CHANGED

@@ -4,6 +4,7 @@ module Harlequin
   R.echo false
   R.eval "library(MASS)"
   R.eval "library(alr3)"
+  R.eval "library(RWeka)"
   class DiscriminantAnalysis
     attr_reader :training_data, :variables, :classification_variable, :accuracy, :class_hash
@@ -14,6 +15,7 @@ module Harlequin
       @classification_variable = classification_variable
       @training_data           = []
       @class_hash              = {}
+      @current_analysis_type   = nil
     end
     def clear_training_data
@@ -31,33 +33,54 @@ module Harlequin
       end
     end
-    # Returns the class determined by linear discriminant analysis for an array of sample points.
+    # Returns the classification for an array of sample points.
     def predict(*samples)
       (variables - [classification_variable]).each do |var|
         R.assign(var.to_s + "_sample", samples.map { |s| s[var] })
       end
       sample_var_declarations = (variables - [classification_variable]).map { |var| "#{var.to_s} = #{var.to_s}_sample" }.join(',')
-      R.eval "sample_points <- data.frame(#{sample_var_declarations})"
-      R.eval "predictions <- predict(fit, sample_points)"
-      R.eval "classes <- as.numeric(predictions$class)"
-      R.eval "d <- data.frame(classes, confidence=predictions$posterior)"
-      prediction_matrix = R.pull "as.matrix(d)"
-      # This requires classes to be integers 1,2,3,...
-      # TODO: implement this without requiring specific values for sample hashes
-      predictions = prediction_matrix.to_a.map do |row|
-        classification = row.first.to_i
-        confidence = row[classification]
-        {
-          :class      => @class_hash.invert[classification],
-          :confidence => confidence
-        }
+      if @current_analysis_type == :k_nearest_neighbor
+        R.eval <<-EOF
+        sample_points <- data.frame(#{sample_var_declarations})
+        predictions <- predict(fit, sample_points)
+        classes <- round(predictions)
+        d <- data.frame(classes, confidence=predictions)
+        EOF
+        prediction_matrix = R.pull "classes"
+        predictions = prediction_matrix.map do |row|
+          classification = row.to_i
+          {
+            :class => @class_hash.invert[classification]
+          }
+        end
+        predictions.count == 1 ? predictions.first : predictions
+      else
+        R.eval <<-EOF
+        sample_points <- data.frame(#{sample_var_declarations})
+        predictions <- predict(fit, sample_points)
+        classes <- as.numeric(predictions$class)
+        d <- data.frame(classes, confidence=predictions$posterior)
+        EOF
+        prediction_matrix = R.pull "as.matrix(d)"
+        predictions = prediction_matrix.to_a.map do |row|
+          classification = row.first.to_i
+          confidence = row[classification]
+          {
+            :class      => @class_hash.invert[classification],
+            :confidence => confidence
+          }
+        end
+        predictions.count == 1 ? predictions.first : predictions
       end
-      predictions.count == 1 ? predictions.first : predictions
     end
     # Performs a test of difference of means between classes
@@ -66,7 +89,7 @@ module Harlequin
       R.eval "t_test <- t.test(#{variable.to_s} ~ #{classification_variable.to_s})"
       t_statistic        = R.pull "t_test$statistic"
-      degrees_of_freedom = R.pull "t_test$df"
+      degrees_of_freedom = R.pull "t_test$parameter"
       p_value            = R.pull "t_test$p.value"
       {
@@ -110,9 +133,22 @@ module Harlequin
           analysis_data <- data.frame(#{@var_declarations})
           fit <- #{analysis_type}(#{classification_variable.to_s} ~ #{@non_class_variables}, data=analysis_data)
         EOF
+        @current_analysis_type = analysis_type.to_sym
         compute_accuracy
       end
     end
+    def init_knn_analysis
+      init_analysis
+      R.eval <<-EOF
+        analysis_data <- data.frame(#{@var_declarations})
+        fit <- IBk(#{classification_variable.to_s} ~ ., data=analysis_data)
+      EOF
+      @current_analysis_type = :k_nearest_neighbor
+      compute_accuracy
+    end
     private
@@ -146,4 +182,4 @@ module Harlequin
   end
 end
-include Harlequin
+include Harlequin

data/lib/harlequin/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Harlequin
-  VERSION = "0.0.1"
+  VERSION = "0.0.2"
 end

data/spec/harlequin_spec.rb CHANGED

@@ -41,6 +41,14 @@ describe Harlequin::DiscriminantAnalysis do
     end
   end
+  it 'predicts for k-nearest neighbor classifiers' do
+    @discriminant_analysis.init_knn_analysis
+    samples = [@male_sample, @female_sample]
+    predictions = @discriminant_analysis.predict(*samples)
+    predictions.map { |row| row[:class] }.should eq [1,2]
+  end
   it 'clears training data from a DiscriminantAnalysis instance' do
     @discriminant_analysis.clear_training_data
     @discriminant_analysis.training_data.should be_empty

data/spec/spec_helper.rb CHANGED

@@ -1,2 +1,3 @@
 require 'harlequin'
 require 'csv'
+require 'pry'

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: harlequin
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.0.2
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-11-30 00:00:00.000000000Z
+date: 2012-03-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
-  requirement: &70351541259960 !ruby/object:Gem::Requirement
+  requirement: &70229741113520 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,10 +21,21 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *70351541259960
+  version_requirements: *70229741113520
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: &70229741113100 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *70229741113100
 - !ruby/object:Gem::Dependency
   name: rinruby
-  requirement: &70351541258940 !ruby/object:Gem::Requirement
+  requirement: &70229741112680 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,7 +43,7 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70351541258940
+  version_requirements: *70229741112680
 description: harlequin is a Ruby wrapper for linear and quadratic discriminant analysis
   in R for statistical classification. Also allows means testing to determine significance
   of discriminant variables.
@@ -72,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project: harlequin
-rubygems_version: 1.8.10
+rubygems_version: 1.8.15
 signing_key:
 specification_version: 3
 summary: Wrapper for discriminant analysis methods in R